Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# -*- coding: utf-8 -*-
import logging
import time
from threading import current_thread
from six.moves import queue
from six.moves.urllib.parse import urlsplit
from icrawler.utils import ThreadPool
class Parser(ThreadPool):
"""Base class for parses.
Essentially a thread manager, in charge of downloading pages, parsing pages,
extracting image urls and put them into task_queue.
Attributes:
url_queue: A queue storing page urls, connecting Feeder and Parser.
task_queue: A queue storing image downloading tasks, connecting
Parser and Downloader.
global_signal: A Signal object for cross-module communication.
session: A requests.Session object.
logger: A logging.Logger object used for logging.
threads: A list storing all the threading.Thread objects of the parser.
thread_num: An integer indicating the number of threads.
lock: A threading.Lock object.
"""
# -*- coding: utf-8 -*-
import os.path as osp
from threading import current_thread
from icrawler.utils import ThreadPool
class Feeder(ThreadPool):
"""Base class for feeder.
A thread pool of feeder threads, in charge of feeding urls to parsers.
Attributes:
thread_num (int): An integer indicating the number of threads.
global_signal (Signal): A :class:`Signal` object for communication
among all threads.
out_queue (Queue): A queue connected with parsers' inputs,
storing page urls.
session (Session): A session object.
logger (Logger): A logging.Logger object used for logging.
workers (list): A list storing all the threading.Thread objects
of the feeder.
lock (Lock): A :class:`Lock` instance shared by all feeder threads.
"""
# -*- coding: utf-8 -*-
from threading import current_thread
from PIL import Image
from six import BytesIO
from six.moves import queue
from six.moves.urllib.parse import urlparse
from icrawler.utils import ThreadPool
class Downloader(ThreadPool):
"""Base class for downloader.
A thread pool of downloader threads, in charge of downloading files and
saving them in the corresponding paths.
Attributes:
task_queue (CachedQueue): A queue storing image downloading tasks,
connecting :class:`Parser` and :class:`Downloader`.
signal (Signal): A Signal object shared by all components.
session (Session): A session object.
logger: A logging.Logger object used for logging.
workers (list): A list of downloader threads.
thread_num (int): The number of downloader threads.
lock (Lock): A threading.Lock object.
storage (BaseStorage): storage backend.
"""