How to use the parsel.csstranslator.HTMLTranslator function in parsel

To help you get started, we’ve selected a few parsel examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapy / scrapy / scrapy / linkextractors / __init__.py View on Github external
'odp',

    # other
    'css', 'pdf', 'exe', 'bin', 'rss', 'dmg', 'iso', 'apk'
]


_re_type = type(re.compile("", 0))
_matches = lambda url, regexs: any(r.search(url) for r in regexs)
_is_valid_url = lambda url: url.split('://', 1)[0] in {'http', 'https', \
                                                       'file', 'ftp'}


class FilteringLinkExtractor(object):

    _csstranslator = HTMLTranslator()

    def __new__(cls, *args, **kwargs):
        from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
        if (issubclass(cls, FilteringLinkExtractor) and
                not issubclass(cls, LxmlLinkExtractor)):
            warn('scrapy.linkextractors.FilteringLinkExtractor is deprecated, '
                 'please use scrapy.linkextractors.LinkExtractor instead',
                 ScrapyDeprecationWarning, stacklevel=2)
        return super().__new__(cls, *args, **kwargs)

    def __init__(self, link_extractor, allow, deny, allow_domains, deny_domains,
                 restrict_xpaths, canonicalize, deny_extensions, restrict_css, restrict_text):

        self.link_extractor = link_extractor

        self.allow_res = [x if isinstance(x, _re_type) else re.compile(x)
github scrapy / parsel / parsel / csstranslator.py View on Github external
return XPathExpr.from_xpath(xpath, textnode=True)


class GenericTranslator(TranslatorMixin, OriginalGenericTranslator):
    @lru_cache(maxsize=256)
    def css_to_xpath(self, css, prefix='descendant-or-self::'):
        return super(GenericTranslator, self).css_to_xpath(css, prefix)


class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
    @lru_cache(maxsize=256)
    def css_to_xpath(self, css, prefix='descendant-or-self::'):
        return super(HTMLTranslator, self).css_to_xpath(css, prefix)


_translator = HTMLTranslator()


def css2xpath(query):
    "Return translated XPath version of a given CSS query"
    return _translator.css_to_xpath(query)
github scrapy / scrapy / scrapy / http / request / form.py View on Github external
def from_response(cls, response, formname=None, formid=None, formnumber=0, formdata=None,
                      clickdata=None, dont_click=False, formxpath=None, formcss=None, **kwargs):

        kwargs.setdefault('encoding', response.encoding)

        if formcss is not None:
            from parsel.csstranslator import HTMLTranslator
            formxpath = HTMLTranslator().css_to_xpath(formcss)

        form = _get_form(response, formname, formid, formnumber, formxpath)
        formdata = _get_inputs(form, formdata, dont_click, clickdata, response)
        url = _get_form_url(form, kwargs.pop('url', None))

        method = kwargs.pop('method', form.method)
        if method is not None:
            method = method.upper()
            if method not in cls.valid_form_methods:
                method = 'GET'

        return cls(url=url, method=method, formdata=formdata, **kwargs)
github scrapy / parsel / parsel / csstranslator.py View on Github external
def css_to_xpath(self, css, prefix='descendant-or-self::'):
        return super(HTMLTranslator, self).css_to_xpath(css, prefix)