How to use the icrawler.downloader.Downloader function in icrawler

To help you get started, we’ve selected a few icrawler examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hellock / icrawler / icrawler / downloader.py View on Github external
self.logger.info('%s is waiting for new download tasks',
                                     current_thread().name)
            except:
                self.logger.error('exception in thread %s',
                                  current_thread().name)
            else:
                self.download(task, default_ext, req_timeout, **kwargs)
                self.process_meta(task)
                self.in_queue.task_done()
        self.logger.info('thread {} exit'.format(current_thread().name))

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.logger.info('all downloader threads exited')


class ImageDownloader(Downloader):
    """Downloader specified for images.
    """

    def _size_lt(self, sz1, sz2):
        return max(sz1) <= max(sz2) and min(sz1) <= min(sz2)

    def _size_gt(self, sz1, sz2):
        return max(sz1) >= max(sz2) and min(sz1) >= min(sz2)

    def keep_file(self, task, response, min_size=None, max_size=None):
        """Decide whether to keep the image

        Compare image size with ``min_size`` and ``max_size`` to decide.

        Args:
            response (Response): response of requests.
github hellock / icrawler / icrawler / downloader.py View on Github external
def __init__(self, thread_num, signal, session, storage):
        """Init Parser with some shared variables."""
        super(Downloader, self).__init__(
            thread_num, out_queue=None, name='downloader')
        self.signal = signal
        self.session = session
        self.storage = storage
        self.file_idx_offset = 0
        self.clear_status()