How to use selectolax - 10 common examples

To help you get started, we’ve selected a few selectolax examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github rushter / selectolax / tests / test_nodes.py View on Github external
def test_node_comparison():
    html = """
        <div>H3ll0</div><div id="tt"><p id="stext">Lorem ipsum dolor sit amet, ea quo modus meliore platonem.</p></div>
    """
    html_parser = HTMLParser(html)
    nodes = [node for node in html_parser.root.traverse(include_text=False)]
    same_node_path_one = nodes[-1].parent
    same_node_path_two = nodes[-2]
    same_node_path_three = html_parser.css_first('#tt')
    assert same_node_path_one == same_node_path_two == same_node_path_three
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_attrs_sets_attribute():
    html_parser = HTMLParser('<div></div>')
    node = html_parser.css_first('div')
    node.attrs['id'] = 'new_id'
    assert node.attributes == {'id': 'new_id'}
github rushter / selectolax / tests / test_parser.py View on Github external
def test_encoding():
    html = "<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p><p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>"
    html = HTMLParser(html)
    assert html.input_encoding == 'UTF-8'

    html = b"<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p><p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>"
    html = HTMLParser(html)
    assert html.input_encoding == 'UTF-8'

    html = "<div>Привет мир!</div>".encode('cp1251')
    assert HTMLParser(html, detect_encoding=True).input_encoding == 'WINDOWS-1251'

    html_utf = ''.encode('utf-8')
    assert HTMLParser(html_utf, detect_encoding=True, use_meta_tags=True).input_encoding == 'WINDOWS-1251'
github rushter / selectolax / tests / test_parser.py View on Github external
def test_parser():
    html = HTMLParser("")
    assert isinstance(html, HTMLParser)

    with pytest.raises(TypeError):
        HTMLParser(123)

    with pytest.raises(TypeError):
        HTMLParser("asd").css(123)
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_text_node_returns_text():
    html = '<div>foo bar</div>'
    html_parser = HTMLParser(html)
    node = html_parser.css_first('div').child
    assert node.text(deep=False) == 'foo bar'
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_replace_with_multiple_nodes():
    html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div>/div&gt;</div>')
    img = html_parser.css_first('span')
    img.replace_with(img.attributes.get('alt', ''))
    assert html_parser.body.child.html == '<div>Get Laptop</div>'
</span></div>
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_css_first_default():
    html = "<span></span><div><p class="p3">text</p><p class="p3">sd</p></div><p></p>"
    selector = ".s3"
    assert HTMLParser(html).css_first(selector, default='lorem ipsum') == 'lorem ipsum'
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_replace_with():
    html_parser = HTMLParser('<div>Get <img alt="Laptop" src=""></div>')
    img = html_parser.css_first('img')
    img.replace_with(img.attributes.get('alt', ''))
    assert html_parser.body.child.html == '<div>Get Laptop</div>'
github rushter / selectolax / tests / test_parser.py View on Github external
def test_nodes():
    html = (
        '<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p>'
        '<p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>'
    )
    htmlp = HTMLParser(html)

    assert isinstance(htmlp.root, Node)
    assert isinstance(htmlp.body, Node)
    html_output = htmlp.html
    assert len(html_output) &gt;= len(html)
    assert SequenceMatcher(None, html, html_output).ratio() &gt; 0.8
github rushter / selectolax / tests / test_parser.py View on Github external
def test_nodes():
    html = (
        '<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p>'
        '<p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>'
    )
    htmlp = HTMLParser(html)

    assert isinstance(htmlp.root, Node)
    assert isinstance(htmlp.body, Node)
    html_output = htmlp.html
    assert len(html_output) &gt;= len(html)
    assert SequenceMatcher(None, html, html_output).ratio() &gt; 0.8

selectolax

Fast HTML5 parser with CSS selectors.

MIT
Latest version published 10 days ago

Package Health Score

82 / 100
Full package analysis

Similar packages