Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#!/usr/bin/env python3
# Copyright (c) 2018 Marco Zollinger
# Licensed under MIT, the license file shall be included in all copies
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler, BaiduImageCrawler
import sys
import time
keywords = sys.argv[1]
print('crawling search engines for images with description %s...' %keywords)
time.sleep(2)
google_crawler = GoogleImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/google'})
bing_crawler = BingImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/bing'})
baidu_crawler = BaiduImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/baidu'})
google_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
bing_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
baidu_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
print('qrcrawler done.\n')
for c in crawlers:
print(f' -> {c}')
if c == 'GOOGLE':
google_crawler = GoogleImageCrawler(
downloader_cls=CustomDownloader,
log_level=logging.CRITICAL,
feeder_threads=1,
parser_threads=1,
downloader_threads=4,
storage={'root_dir': folder})
google_crawler.crawl(keyword=search, offset=0, max_num=maxnum,
min_size=(200,200), max_size=None, file_idx_offset=0)
if c == 'BING':
bing_crawler = BingImageCrawler(downloader_cls=CustomDownloader,
log_level=logging.CRITICAL,
downloader_threads=4,
storage={'root_dir': folder})
bing_crawler.crawl(keyword=search, filters=None, offset=0, max_num=maxnum, file_idx_offset='auto')
if c == 'BAIDU':
baidu_crawler = BaiduImageCrawler(downloader_cls=CustomDownloader,
log_level=logging.CRITICAL,
storage={'root_dir': folder})
baidu_crawler.crawl(keyword=search, offset=0, max_num=maxnum,
min_size=(200,200), max_size=None, file_idx_offset='auto')
return {k: v for k, v in CustomDownloader.registry.items() if k is not None}
from icrawler.builtin import FlickrImageCrawler
import sys
import os
argv = sys.argv
if not os.path.isdir(argv[1]):
os.makedirs(argv[1])
#crawler = GoogleImageCrawler(storage = {"root_dir" : argv[1]})
crawler = GoogleImageCrawler(storage={'root_dir': f'{argv[1]}/google'})
crawler.crawl(keyword = argv[2], max_num = 10000, min_size=(200,200), max_size=None)
#bing_crawler = BingImageCrawler(storage = {"root_dir" : argv[1]})
bing_crawler = BingImageCrawler(storage={'root_dir': f'{argv[1]}/bing'})
bing_crawler.crawl(keyword=argv[2], max_num = 10000, min_size=(200,200), max_size=None)
#baidu_crawler = BaiduImageCrawler(storage = {"root_dir" : argv[1]})
baidu_crawler = BaiduImageCrawler(storage={'root_dir': f'{argv[1]}/baidu'})
baidu_crawler.crawl(keyword=argv[2], max_num = 10000, min_size=(200,200), max_size=None)
flickr_crawler = FlickrImageCrawler(storage={'root_dir': f'{argv[1]}/flickr'})
flickr_crawler.crawl(keyword=argv[2], max_num = 10000, min_size=(200,200), max_size=None)