How to use parsel - 10 common examples

To help you get started, we’ve selected a few parsel examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapy / parsel / tests / test_selector.py View on Github external
def test_smart_strings(self):
        """Lxml smart strings return values"""

        class SmartStringsSelector(Selector):
            _lxml_smart_strings = True

        body = u"""
                    <div class="one">
                      <ul>
                        <li>one</li><li>two</li>
                      </ul>
                    </div>
                    <div class="two">
                      <ul>
                        <li>four</li><li>five</li><li>six</li>
                      </ul>
                    </div>
                  """

        # .getparent() is available for text nodes and attributes
github scrapy / parsel / tests / test_xpathfuncs.py View on Github external
def test_has_class_error_no_args(self):
        body = u"""
        <p class="foo">First</p>
        """
        sel = Selector(text=body)
        self.assertRaisesRegexp(
            ValueError, 'has-class must have at least 1 argument',
            sel.xpath, 'has-class()')
github GovReady / govready-q / testmocking / web.py View on Github external
def _use_page(self, response):
        self.response = response
        self.selector = parsel.Selector(text=response.text)
github GovReady / govready-q / testmocking / data_management.py View on Github external
def login(username, password, domain):
    session = requests.Session()
    response = session.get(domain)
    return parsel.Selector(text=response.text)
github scrapy / parsel / tests / test_selector_lxmldocument.py View on Github external
def test_caching(self):
        r1 = HtmlResponse('http://www.example.com', body='')
        r2 = r1.copy()

        doc1 = LxmlDocument(r1)
        doc2 = LxmlDocument(r1)
        doc3 = LxmlDocument(r2)

        # make sure it's cached
        assert doc1 is doc2
        assert doc1 is not doc3
github scrapy / parsel / tests / test_selector_lxmldocument.py View on Github external
def test_null_char(self):
        # make sure bodies with null char ('\x00') don't raise a TypeError exception
        body = 'test problematic \x00 body'
        response = TextResponse('http://example.com/catalog/product/blabla-123',
                                headers={'Content-Type': 'text/plain; charset=utf-8'},
                                body=body)
        LxmlDocument(response)
github scrapy / parsel / tests / test_selector_lxmldocument.py View on Github external
def test_caching(self):
        r1 = HtmlResponse('http://www.example.com', body='')
        r2 = r1.copy()

        doc1 = LxmlDocument(r1)
        doc2 = LxmlDocument(r1)
        doc3 = LxmlDocument(r2)

        # make sure it's cached
        assert doc1 is doc2
        assert doc1 is not doc3
github scrapy / parsel / tests / test_selector_lxmldocument.py View on Github external
def test_caching(self):
        r1 = HtmlResponse('http://www.example.com', body='')
        r2 = r1.copy()

        doc1 = LxmlDocument(r1)
        doc2 = LxmlDocument(r1)
        doc3 = LxmlDocument(r2)

        # make sure it's cached
        assert doc1 is doc2
        assert doc1 is not doc3
github scrapy / parsel / tests / test_utils.py View on Github external
def test_extract_regex(regex, text, replace_entities, expected):
    assert extract_regex(regex, text, replace_entities) == expected
github scrapy / parsel / tests / test_selector.py View on Github external
def test_extending_selector(self):
        class MySelectorList(Selector.selectorlist_cls):
            pass

        class MySelector(Selector):
            selectorlist_cls = MySelectorList

        sel = MySelector(text=u'<div>foo</div>')
        self.assertIsInstance(sel.xpath('//div'), MySelectorList)
        self.assertIsInstance(sel.xpath('//div')[0], MySelector)
        self.assertIsInstance(sel.css('div'), MySelectorList)
        self.assertIsInstance(sel.css('div')[0], MySelector)