How to use the ruia.RegexField function in ruia

To help you get started, we’ve selected a few ruia examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github howie6879 / ruia / tests / test_field.py View on Github external
def test_re_field_get_nothing_with_no_default():
    field = RegexField(re_select="nothing to match.")
    try:
        field.extract(html=HTML)
    except Exception as e:
        assert isinstance(e, NothingMatchedError)
github howie6879 / ruia / tests / test_field.py View on Github external
def test_re_field_with_named_groups():
    field = RegexField(re_select='<h1><a href="(?P<href>.*?)">(?P</a></h1>')
    result = field.extract(html=HTML)
    assert result["href"] == "https://github.com"
    assert result["text"] == "Github"
github howie6879 / ruia / tests / test_field.py View on Github external
def test_re_field_in_dict_format_with_many():
    field = RegexField(
        re_select='<a href="(?P<href>.*?)" class="test_link">(?P</a>',
        many=True,
    )
    matches = field.extract(html=HTML)
    assert len(matches) == 5
    assert matches[0]["href"] == "https://github.com/howie6879/"
    assert matches[0]["text"] == "hello1 github."
    assert matches[4]["href"] == "https://github.com/howie6879/"
    assert matches[4]["text"] == "hello5 github."
github howie6879 / ruia / tests / test_field.py View on Github external
def test_re_field_with_default():
    field = RegexField(re_select="nothing to match.", default="default value")
    result = field.extract(html=HTML)
    assert result == "default value"
github howie6879 / ruia / tests / test_field.py View on Github external
def test_re_field_with_many():
    field = RegexField(
        re_select='<a href="(.*?)" class="test_link">(.*?)</a>', many=True
    )
    matches = field.extract(html=HTML)
    assert len(matches) == 5
    href0, text0 = matches[0]
    href4, text4 = matches[4]
    assert href0 == "https://github.com/howie6879/"
    assert text0 == "hello1 github."
    assert href4 == "https://github.com/howie6879/"
    assert text4 == "hello5 github."
github howie6879 / ruia / tests / for_doc / apis / test_field.py View on Github external
def test_regex_field():
    title = ruia.RegexField(re_select='<div href="(.*?)" class="title">(.*?)</div>')
    assert title.extract(html=HTML)[0] == "/"
    assert title.extract(html=HTML)[1] == "Ruia Documentation"
    tags = ruia.RegexField(
        re_select='<li href="(?P<href>.*?)" class="tag">(?P</li>', many=True
    )
    result = tags.extract(html=HTML)
    assert isinstance(result, list)
    assert len(result) == 3
    assert isinstance(result[0], dict)
    assert result[0]["href"] == "./easy.html"
github howie6879 / ruia / tests / test_field.py View on Github external
def test_re_field_with_html_element():
    field = RegexField(re_select='<h1><a href="(?P<href>.*?)">(?P</a></h1>')
    result = field.extract(html=html_etree)
    assert result["href"] == "https://github.com"
    assert result["text"] == "Github"
github howie6879 / ruia / tests / test_field.py View on Github external
def test_re_field_with_no_group():
    field = RegexField(re_select="<title>.*?</title>")
    href = field.extract(html=HTML)
    assert href == "<title>ruia</title>"
github howie6879 / ruia / tests / for_doc / apis / test_field.py View on Github external
def test_regex_field():
    title = ruia.RegexField(re_select='<div href="(.*?)" class="title">(.*?)</div>')
    assert title.extract(html=HTML)[0] == "/"
    assert title.extract(html=HTML)[1] == "Ruia Documentation"
    tags = ruia.RegexField(
        re_select='<li href="(?P<href>.*?)" class="tag">(?P</li>', many=True
    )
    result = tags.extract(html=HTML)
    assert isinstance(result, list)
    assert len(result) == 3
    assert isinstance(result[0], dict)
    assert result[0]["href"] == "./easy.html"
github howie6879 / ruia / tests / test_field.py View on Github external
def test_re_field_with_many_groups():
    field = RegexField(re_select='<h1><a href="(.*?)">(.*?)</a></h1>')
    href, text = field.extract(html=HTML)
    assert href == "https://github.com"
    assert text == "Github"