How to use the icrawler.builtin.GoogleImageCrawler function in icrawler

To help you get started, we’ve selected a few icrawler examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github cwerner / fastclass / fastclass / fc_download.py View on Github external
def crawl(folder: str, search: str, maxnum:int, crawlers: [List[str]] = ['GOOGLE', 'BING', 'BAIDU']) -> Dict[str, str]:
    """Crawl web sites for images"""
    print('(1) Crawling ...')
    # prepare folders
    os.makedirs(folder, exist_ok=True)

    sources = {}
    if maxnum > 1000:
        print("Max num limited to 1000")
        maxnum = 1000

    for c in crawlers:
        print(f'    -> {c}')
        if c == 'GOOGLE':
            google_crawler = GoogleImageCrawler(
                downloader_cls=CustomDownloader,
                log_level=logging.CRITICAL,
                feeder_threads=1,
                parser_threads=1,
                downloader_threads=4,
                storage={'root_dir': folder})

            google_crawler.crawl(keyword=search, offset=0, max_num=maxnum,
                                min_size=(200,200), max_size=None, file_idx_offset=0)

        if c == 'BING':
            bing_crawler = BingImageCrawler(downloader_cls=CustomDownloader,
                                            log_level=logging.CRITICAL,
                                            downloader_threads=4,
                                            storage={'root_dir': folder})
            bing_crawler.crawl(keyword=search, filters=None, offset=0, max_num=maxnum, file_idx_offset='auto')
github yzy1996 / Python-Code / Reptile / reptile-google.py View on Github external
# 爬谷歌的图片
# keyword是关键词

from icrawler.builtin import GoogleImageCrawler

google_crawler = GoogleImageCrawler(storage={'root_dir': '111'})
google_crawler.crawl(keyword='cat', max_num=10)
github mzollin / qr-pirate / qrcrawler.py View on Github external
#!/usr/bin/env python3

# Copyright (c) 2018 Marco Zollinger
# Licensed under MIT, the license file shall be included in all copies

from icrawler.builtin import GoogleImageCrawler, BingImageCrawler, BaiduImageCrawler
import sys
import time

keywords = sys.argv[1]
print('crawling search engines for images with description %s...' %keywords)
time.sleep(2)

google_crawler = GoogleImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/google'})
bing_crawler = BingImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/bing'})
baidu_crawler = BaiduImageCrawler(parser_threads=4, downloader_threads=8, storage={'root_dir': 'qrbooty/baidu'})

google_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
bing_crawler.crawl(keyword=keywords, offset=0, max_num=1000)
baidu_crawler.crawl(keyword=keywords, offset=0, max_num=1000)

print('qrcrawler done.\n')
github Sanaxen / cpp_torch / cpp_torch / tool / image_collector / image_collector2.py View on Github external
from icrawler.builtin import GoogleImageCrawler
from icrawler.builtin import BingImageCrawler
from icrawler.builtin import BaiduImageCrawler
from icrawler.builtin import FlickrImageCrawler
import sys
import os

argv = sys.argv

if not os.path.isdir(argv[1]):
    os.makedirs(argv[1])


#crawler = GoogleImageCrawler(storage = {"root_dir" : argv[1]})
crawler = GoogleImageCrawler(storage={'root_dir': f'{argv[1]}/google'})
crawler.crawl(keyword = argv[2], max_num = 10000,  min_size=(200,200), max_size=None)

#bing_crawler = BingImageCrawler(storage = {"root_dir" : argv[1]})
bing_crawler = BingImageCrawler(storage={'root_dir': f'{argv[1]}/bing'})
bing_crawler.crawl(keyword=argv[2], max_num = 10000,  min_size=(200,200), max_size=None)

#baidu_crawler = BaiduImageCrawler(storage = {"root_dir" : argv[1]})
baidu_crawler = BaiduImageCrawler(storage={'root_dir': f'{argv[1]}/baidu'})
baidu_crawler.crawl(keyword=argv[2], max_num = 10000,  min_size=(200,200), max_size=None)

flickr_crawler = FlickrImageCrawler(storage={'root_dir': f'{argv[1]}/flickr'})
flickr_crawler.crawl(keyword=argv[2], max_num = 10000,  min_size=(200,200), max_size=None)