Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def crawl(folder: str, search: str, maxnum:int, crawlers: [List[str]] = ['GOOGLE', 'BING', 'BAIDU']) -> Dict[str, str]:
"""Crawl web sites for images"""
print('(1) Crawling ...')
# prepare folders
os.makedirs(folder, exist_ok=True)
sources = {}
if maxnum > 1000:
print("Max num limited to 1000")
maxnum = 1000
for c in crawlers:
print(f' -> {c}')
if c == 'GOOGLE':
google_crawler = GoogleImageCrawler(
downloader_cls=CustomDownloader,
log_level=logging.CRITICAL,
feeder_threads=1,
parser_threads=1,
downloader_threads=4,
storage={'root_dir': folder})
google_crawler.crawl(keyword=search, offset=0, max_num=maxnum,
min_size=(200,200), max_size=None, file_idx_offset=0)
if c == 'BING':
bing_crawler = BingImageCrawler(downloader_cls=CustomDownloader,
log_level=logging.CRITICAL,
downloader_threads=4,
storage={'root_dir': folder})
bing_crawler.crawl(keyword=search, filters=None, offset=0, max_num=maxnum, file_idx_offset='auto')
# 爬谷歌的图片
# keyword是关键词
from icrawler.builtin import GoogleImageCrawler
google_crawler = GoogleImageCrawler(storage={'root_dir': '111'})
google_crawler.crawl(keyword='cat', max_num=10)
#!/usr/bin/env python3
# Copyright (c) 2018 Marco Zollinger
# Licensed under MIT, the license file shall be included in all copies
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler, BaiduImageCrawler
import sys
import time
keywords = sys.argv[1]
print('crawling search engines for images with description %s...' %keywords)
time.sleep(2)
google_crawler = GoogleImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/google'})
bing_crawler = BingImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/bing'})
baidu_crawler = BaiduImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/baidu'})
google_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
bing_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
baidu_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
print('qrcrawler done.\n')
from icrawler.builtin import GoogleImageCrawler
from icrawler.builtin import BingImageCrawler
from icrawler.builtin import BaiduImageCrawler
from icrawler.builtin import FlickrImageCrawler
import sys
import os
argv = sys.argv
if not os.path.isdir(argv[1]):
os.makedirs(argv[1])
#crawler = GoogleImageCrawler(storage = {"root_dir" : argv[1]})
crawler = GoogleImageCrawler(storage={'root_dir': f'{argv[1]}/google'})
crawler.crawl(keyword = argv[2], max_num = 10000, min_size=(200,200), max_size=None)
#bing_crawler = BingImageCrawler(storage = {"root_dir" : argv[1]})
bing_crawler = BingImageCrawler(storage={'root_dir': f'{argv[1]}/bing'})
bing_crawler.crawl(keyword=argv[2], max_num = 10000, min_size=(200,200), max_size=None)
#baidu_crawler = BaiduImageCrawler(storage = {"root_dir" : argv[1]})
baidu_crawler = BaiduImageCrawler(storage={'root_dir': f'{argv[1]}/baidu'})
baidu_crawler.crawl(keyword=argv[2], max_num = 10000, min_size=(200,200), max_size=None)
flickr_crawler = FlickrImageCrawler(storage={'root_dir': f'{argv[1]}/flickr'})
flickr_crawler.crawl(keyword=argv[2], max_num = 10000, min_size=(200,200), max_size=None)