How to use the icrawler.builtin.BaiduImageCrawler function in icrawler

To help you get started, we’ve selected a few icrawler examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github cwerner / fastclass / fastclass / fc_download.py View on Github external
downloader_threads=4,
                storage={'root_dir': folder})

            google_crawler.crawl(keyword=search, offset=0, max_num=maxnum,
                                min_size=(200,200), max_size=None, file_idx_offset=0)

        if c == 'BING':
            bing_crawler = BingImageCrawler(downloader_cls=CustomDownloader,
                                            log_level=logging.CRITICAL,
                                            downloader_threads=4,
                                            storage={'root_dir': folder})
            bing_crawler.crawl(keyword=search, filters=None, offset=0, max_num=maxnum, file_idx_offset='auto')


        if c == 'BAIDU':
            baidu_crawler = BaiduImageCrawler(downloader_cls=CustomDownloader,
                                    log_level=logging.CRITICAL,
                                    storage={'root_dir': folder})
            baidu_crawler.crawl(keyword=search, offset=0, max_num=maxnum,
                                min_size=(200,200), max_size=None, file_idx_offset='auto')


    return {k: v for k, v in CustomDownloader.registry.items() if k is not None}
github mzollin / qr-pirate / qrcrawler.py View on Github external
#!/usr/bin/env python3

# Copyright (c) 2018 Marco Zollinger
# Licensed under MIT, the license file shall be included in all copies

from icrawler.builtin import GoogleImageCrawler, BingImageCrawler, BaiduImageCrawler
import sys
import time

keywords = sys.argv[1]
print('crawling search engines for images with description %s...' %keywords)
time.sleep(2)

google_crawler = GoogleImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/google'})
bing_crawler = BingImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/bing'})
baidu_crawler = BaiduImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/baidu'})

google_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
bing_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
baidu_crawler.crawl(keyword=keywords, offset=0, max_num=1000)

print('qrcrawler done.\n')
github Sanaxen / cpp_torch / cpp_torch / tool / image_collector / image_collector2.py View on Github external
argv = sys.argv

if not os.path.isdir(argv[1]):
    os.makedirs(argv[1])


#crawler = GoogleImageCrawler(storage = {"root_dir" : argv[1]})
crawler = GoogleImageCrawler(storage={'root_dir': f'{argv[1]}/google'})
crawler.crawl(keyword = argv[2], max_num = 10000,  min_size=(200,200), max_size=None)

#bing_crawler = BingImageCrawler(storage = {"root_dir" : argv[1]})
bing_crawler = BingImageCrawler(storage={'root_dir': f'{argv[1]}/bing'})
bing_crawler.crawl(keyword=argv[2], max_num = 10000,  min_size=(200,200), max_size=None)

#baidu_crawler = BaiduImageCrawler(storage = {"root_dir" : argv[1]})
baidu_crawler = BaiduImageCrawler(storage={'root_dir': f'{argv[1]}/baidu'})
baidu_crawler.crawl(keyword=argv[2], max_num = 10000,  min_size=(200,200), max_size=None)

flickr_crawler = FlickrImageCrawler(storage={'root_dir': f'{argv[1]}/flickr'})
flickr_crawler.crawl(keyword=argv[2], max_num = 10000,  min_size=(200,200), max_size=None)