How to use the scrapy.settings.Settings function in Scrapy

To help you get started, we’ve selected a few Scrapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapy-plugins / scrapy-pagestorage / tests / test_pagestorage.py View on Github external
def test_from_crawler(self, mocked_hsref):
        crawler_mock = mock.Mock()
        crawler_mock.settings = Settings()
        self.assertRaises(NotConfigured,
                          PageStorageMiddleware.from_crawler,
                          crawler_mock)
        # test creating an instance for all other cases
        crawler_mock.settings = mock.Mock()
        mocked_values = [(True, False), (False, True), (True, True)]
        crawler_mock.settings.side_effect = mocked_values
        for _ in range(len(mocked_values)):
            assert isinstance(PageStorageMiddleware.from_crawler(crawler_mock),
                              PageStorageMiddleware)
github scrapy / scrapy / tests / test_feedexport.py View on Github external
def run_and_export(self, spider_cls, settings=None):
        """ Run spider with specified settings; return exported data. """
        tmpdir = tempfile.mkdtemp()
        res_name = tmpdir + '/res'
        defaults = {
            'FEED_URI': 'file://' + res_name,
            'FEED_FORMAT': 'csv',
        }
        defaults.update(settings or {})
        try:
            with MockServer() as s:
                runner = CrawlerRunner(Settings(defaults))
                yield runner.crawl(spider_cls)

            with open(res_name, 'rb') as f:
                defer.returnValue(f.read())

        finally:
            shutil.rmtree(tmpdir)
github scrapy / scrapy / tests / test_pipeline_files.py View on Github external
def test_expires(self):
        another_pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir,
                                                                'FILES_EXPIRES': 42}))
        self.assertEqual(self.pipeline.expires, self.default_settings.getint('FILES_EXPIRES'))
        self.assertEqual(another_pipeline.expires, 42)
github ejulio / spider-feeder / tests / store / file_handler / test_local.py View on Github external
def test_open_local_file(mocker):
    mock = mocker.patch('spider_feeder.store.file_handler.local.builtins.open')
    local.open('/tmp/input_urls.txt', encoding='utf-8', settings=Settings())
    mock.assert_called_once_with('/tmp/input_urls.txt', encoding='utf-8')
github xiaowangwindow / scrapy-rotated-proxy / tests / test_downloadermiddleware_httpproxy.py View on Github external
def test_multi_scheme(self):
        settings = Settings({'HTTP_PROXIES': ['https://proxy1.for.http:3128'],
                             'HTTPS_PROXIES': ['https://proxy2.for.http:3128']})
        with self._middleware(spider, settings) as mw:
            req_http = Request('http://scrapytest.org')
            req_https = Request('https://scrapytest.org')
            res_http = yield mw.process_request(req_http, spider)
            res_https = yield mw.process_request(req_https, spider)
            assert res_http is None
            assert res_https is None
            self.assertEqual(req_http.meta,
                             {'proxy': 'https://proxy1.for.http:3128'})
            self.assertEqual(req_https.meta,
                             {'proxy': 'https://proxy2.for.http:3128'})
github scrapy / scrapy / tests / test_pipeline_files.py View on Github external
def test_files_urls_field(self):
        another_pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir,
                                                                'FILES_URLS_FIELD': 'funny_field'}))
        self.assertEqual(self.pipeline.files_urls_field, self.default_settings.get('FILES_URLS_FIELD'))
        self.assertEqual(another_pipeline.files_urls_field, 'funny_field')
github scrapinghub / portia / portia_server / portia_api / utils / spiders.py View on Github external
def load_spider(model):
    name, spider, items, extractors = load_spider_data(model)
    return IblSpider(name, spider, items, extractors, Settings())
github Code4HR / open-health-inspection-scraper / scraper / helpers / scoring.py View on Github external
def __init__(self,settings=None):
        if isinstance(settings, dict) or settings is None:
            settings = Settings(settings)
        self.settings = settings


        self.score_logger = logging.getLogger(__name__)
        self.score_logger.setLevel(logging.INFO)

        ### Set up database connection (pulled from settings)
        connection = pymongo.MongoClient(
            host=settings['MONGODB_SERVER'],
			port=int(settings['MONGODB_PORT'])
		)

        db = connection[settings['MONGODB_DB']]
        self.collection = db[settings['MONGODB_COLLECTION']]

        '''
github amol9 / imagebot / imagebot / main.py View on Github external
def start_spider(args):
	settings.LOG_LEVEL = args.log_level
	project_settings = Settings()
	project_settings.setmodule(settings)
	
	process = CrawlerProcess(project_settings)
	
	process.crawl(ImageSpider, domains=args.domains, start_urls=args.start_urls, jobname=args.jobname, stay_under=args.stay_under,
			monitor=args.monitor, user_agent=args.user_agent, minsize=args.min_size, no_cache=args.no_cache,
			images_store=args.images_store, depth_limit=args.depth_limit, url_regex=args.url_regex,
			no_cdns=args.no_cdns, auto_throttle=args.auto_throttle, log_level=args.log_level)

	process.start()
github scrapinghub / scrapyrt / scrapyrt / conf / spider_settings.py View on Github external
def get_project_settings(module=None, custom_settings=None):
    crawler_settings = Settings()
    if module is None:
        module = settings.PROJECT_SETTINGS
    crawler_settings.setmodule(module, priority='project')
    if custom_settings:
        assert isinstance(custom_settings, dict)
        crawler_settings.setdict(custom_settings, priority='cmdline')
    return crawler_settings