Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def parsed_node_without_link():
"""Call parse_node function without a direct link"""
spider = base_spider.BaseSpider()
body = """
"""
response = fake_response_from_string(text=body)
node = get_node(spider, 'OAI-PMH:record', text=body)
response.meta["record"] = node.extract()
with requests_mock.Mocker() as mock:
def parsed_node():
"""Call parse_node function with a direct link"""
url = "http://www.example.com/bitstream/1885/10005/1/Butt_R.D._2003.pdf"
responses.add(responses.HEAD, url, status=200,
content_type='application/pdf')
spider = base_spider.BaseSpider()
body = """
"""
response = fake_response_from_string(text=body)
node = get_node(spider, 'OAI-PMH:record', text=body)
response.meta["record"] = node[0].extract()
parsed_item = spider.parse_node(response, node[0])
def record():
"""Return built HEPRecord from the BASE spider."""
spider = base_spider.BaseSpider()
response = fake_response_from_file('base/test_1.xml')
selector = Selector(response, type='xml')
spider._register_namespaces(selector)
nodes = selector.xpath('.//%s' % spider.itertag)
response.meta["record"] = nodes[0].extract()
response.meta["urls"] = ["http://hdl.handle.net/1885/10005"]
parsed_item = spider.build_item(response)
assert parsed_item
assert parsed_item.record
return parsed_item.record
def urls():
spider = base_spider.BaseSpider()
response = fake_response_from_file('base/test_1.xml')
selector = Selector(response, type='xml')
spider._register_namespaces(selector)
nodes = selector.xpath('.//%s' % spider.itertag)
return spider.get_urls_in_record(nodes[0])
def splash():
"""Call web scraper function, return final HEPRecord."""
spider = base_spider.BaseSpider()
splash_response = fake_response_from_file('base/test_1_splash.htm')
response = fake_response_from_file('base/test_1.xml')
selector = Selector(response, type='xml')
spider._register_namespaces(selector)
nodes = selector.xpath('.//%s' % spider.itertag)
splash_response.meta["record"] = nodes[0].extract()
with requests_mock.Mocker() as mock:
mock.head(
'http://www.example.com/bitstream/1885/10005/1/Butt_R.D._2003.pdf',
headers={
'Content-Type': 'text/html',
},
)
parsed_item = spider.scrape_for_pdf(splash_response)
def parsed_node_missing_scheme():
"""Call parse_node function with a link missing a http identifier."""
spider = base_spider.BaseSpider()
body = """
"""
response = fake_response_from_string(text=body)
node = get_node(spider, 'OAI-PMH:record', text=body)
response.meta["record"] = node.extract_first()
with requests_mock.Mocker() as mock:
def direct_links():
spider = base_spider.BaseSpider()
urls = ["http://hdl.handle.net/1885/10005"]
return spider.find_direct_links(urls)
def __init__(self, source_file=None, *args, **kwargs):
"""Construct BASE spider"""
super(BaseSpider, self).__init__(*args, **kwargs)
self.source_file = source_file