Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
'odp',
# other
'css', 'pdf', 'exe', 'bin', 'rss', 'dmg', 'iso', 'apk'
]
_re_type = type(re.compile("", 0))
_matches = lambda url, regexs: any(r.search(url) for r in regexs)
_is_valid_url = lambda url: url.split('://', 1)[0] in {'http', 'https', \
'file', 'ftp'}
class FilteringLinkExtractor(object):
_csstranslator = HTMLTranslator()
def __new__(cls, *args, **kwargs):
from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
if (issubclass(cls, FilteringLinkExtractor) and
not issubclass(cls, LxmlLinkExtractor)):
warn('scrapy.linkextractors.FilteringLinkExtractor is deprecated, '
'please use scrapy.linkextractors.LinkExtractor instead',
ScrapyDeprecationWarning, stacklevel=2)
return super().__new__(cls, *args, **kwargs)
def __init__(self, link_extractor, allow, deny, allow_domains, deny_domains,
restrict_xpaths, canonicalize, deny_extensions, restrict_css, restrict_text):
self.link_extractor = link_extractor
self.allow_res = [x if isinstance(x, _re_type) else re.compile(x)
return XPathExpr.from_xpath(xpath, textnode=True)
class GenericTranslator(TranslatorMixin, OriginalGenericTranslator):
@lru_cache(maxsize=256)
def css_to_xpath(self, css, prefix='descendant-or-self::'):
return super(GenericTranslator, self).css_to_xpath(css, prefix)
class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
@lru_cache(maxsize=256)
def css_to_xpath(self, css, prefix='descendant-or-self::'):
return super(HTMLTranslator, self).css_to_xpath(css, prefix)
_translator = HTMLTranslator()
def css2xpath(query):
"Return translated XPath version of a given CSS query"
return _translator.css_to_xpath(query)
def from_response(cls, response, formname=None, formid=None, formnumber=0, formdata=None,
clickdata=None, dont_click=False, formxpath=None, formcss=None, **kwargs):
kwargs.setdefault('encoding', response.encoding)
if formcss is not None:
from parsel.csstranslator import HTMLTranslator
formxpath = HTMLTranslator().css_to_xpath(formcss)
form = _get_form(response, formname, formid, formnumber, formxpath)
formdata = _get_inputs(form, formdata, dont_click, clickdata, response)
url = _get_form_url(form, kwargs.pop('url', None))
method = kwargs.pop('method', form.method)
if method is not None:
method = method.upper()
if method not in cls.valid_form_methods:
method = 'GET'
return cls(url=url, method=method, formdata=formdata, **kwargs)
def css_to_xpath(self, css, prefix='descendant-or-self::'):
return super(HTMLTranslator, self).css_to_xpath(css, prefix)