Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_from_crawler(self, mocked_hsref):
crawler_mock = mock.Mock()
crawler_mock.settings = Settings()
self.assertRaises(NotConfigured,
PageStorageMiddleware.from_crawler,
crawler_mock)
# test creating an instance for all other cases
crawler_mock.settings = mock.Mock()
mocked_values = [(True, False), (False, True), (True, True)]
crawler_mock.settings.side_effect = mocked_values
for _ in range(len(mocked_values)):
assert isinstance(PageStorageMiddleware.from_crawler(crawler_mock),
PageStorageMiddleware)
def run_and_export(self, spider_cls, settings=None):
""" Run spider with specified settings; return exported data. """
tmpdir = tempfile.mkdtemp()
res_name = tmpdir + '/res'
defaults = {
'FEED_URI': 'file://' + res_name,
'FEED_FORMAT': 'csv',
}
defaults.update(settings or {})
try:
with MockServer() as s:
runner = CrawlerRunner(Settings(defaults))
yield runner.crawl(spider_cls)
with open(res_name, 'rb') as f:
defer.returnValue(f.read())
finally:
shutil.rmtree(tmpdir)
def test_expires(self):
another_pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir,
'FILES_EXPIRES': 42}))
self.assertEqual(self.pipeline.expires, self.default_settings.getint('FILES_EXPIRES'))
self.assertEqual(another_pipeline.expires, 42)
def test_open_local_file(mocker):
mock = mocker.patch('spider_feeder.store.file_handler.local.builtins.open')
local.open('/tmp/input_urls.txt', encoding='utf-8', settings=Settings())
mock.assert_called_once_with('/tmp/input_urls.txt', encoding='utf-8')
def test_multi_scheme(self):
settings = Settings({'HTTP_PROXIES': ['https://proxy1.for.http:3128'],
'HTTPS_PROXIES': ['https://proxy2.for.http:3128']})
with self._middleware(spider, settings) as mw:
req_http = Request('http://scrapytest.org')
req_https = Request('https://scrapytest.org')
res_http = yield mw.process_request(req_http, spider)
res_https = yield mw.process_request(req_https, spider)
assert res_http is None
assert res_https is None
self.assertEqual(req_http.meta,
{'proxy': 'https://proxy1.for.http:3128'})
self.assertEqual(req_https.meta,
{'proxy': 'https://proxy2.for.http:3128'})
def test_files_urls_field(self):
another_pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir,
'FILES_URLS_FIELD': 'funny_field'}))
self.assertEqual(self.pipeline.files_urls_field, self.default_settings.get('FILES_URLS_FIELD'))
self.assertEqual(another_pipeline.files_urls_field, 'funny_field')
def load_spider(model):
name, spider, items, extractors = load_spider_data(model)
return IblSpider(name, spider, items, extractors, Settings())
def __init__(self,settings=None):
if isinstance(settings, dict) or settings is None:
settings = Settings(settings)
self.settings = settings
self.score_logger = logging.getLogger(__name__)
self.score_logger.setLevel(logging.INFO)
### Set up database connection (pulled from settings)
connection = pymongo.MongoClient(
host=settings['MONGODB_SERVER'],
port=int(settings['MONGODB_PORT'])
)
db = connection[settings['MONGODB_DB']]
self.collection = db[settings['MONGODB_COLLECTION']]
'''
def start_spider(args):
settings.LOG_LEVEL = args.log_level
project_settings = Settings()
project_settings.setmodule(settings)
process = CrawlerProcess(project_settings)
process.crawl(ImageSpider, domains=args.domains, start_urls=args.start_urls, jobname=args.jobname, stay_under=args.stay_under,
monitor=args.monitor, user_agent=args.user_agent, minsize=args.min_size, no_cache=args.no_cache,
images_store=args.images_store, depth_limit=args.depth_limit, url_regex=args.url_regex,
no_cdns=args.no_cdns, auto_throttle=args.auto_throttle, log_level=args.log_level)
process.start()
def get_project_settings(module=None, custom_settings=None):
crawler_settings = Settings()
if module is None:
module = settings.PROJECT_SETTINGS
crawler_settings.setmodule(module, priority='project')
if custom_settings:
assert isinstance(custom_settings, dict)
crawler_settings.setdict(custom_settings, priority='cmdline')
return crawler_settings