How to use the refextract.references.pdf.extract_texkeys_from_pdf function in refextract

To help you get started, we’ve selected a few refextract examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github inspirehep / refextract / tests / test_pdf.py View on Github external
u'LHC-HCG',
        u'Cowan:2010st',
        u'Verkerke:2003ir',
        u'Moneta:2010pm',
        u'Cranmer:2012sba',
        u'Aad:2014nim',
        u'Aad:2014rra',
        u'Chatrchyan:2012xi',
        u'Khachatryan:2015hwa',
        u'Khachatryan:2015iwa',
        u'Dauncey:2014xga',
        u'Z-Pole'
    ]
    assert one_col_keys == expected

    two_col_keys = extract_texkeys_from_pdf(pdf_files[1])
    expected = [
        u'Aad:2015owa',
        u'CMSCollaboration:2014df',
        u'Fukano:2015ud',
        u'Hisano:2015gna',
        u'Franzosi:2015ts',
        u'Cheung:2015vl',
        u'Dobrescu:2015va',
        u'AguilarSaavedra:2015tw',
        u'Alves:2015tf',
        u'Gao:2015ws',
        u'Thamm:2015wd',
        u'Brehmer:2015tq',
        u'Cao:2015we',
        u'Cacciapaglia:2015uf',
        u'Anonymous:2015ul',
github inspirehep / refextract / tests / test_pdf.py View on Github external
def test_extract_texkeys_from_pdf(pdf_files):
    one_col_keys = extract_texkeys_from_pdf(pdf_files[0])
    expected = [
        u'Englert:1964et',
        u'Higgs:1964ia',
        u'Higgs:1964pj',
        u'Guralnik:1964eu',
        u'Higgs:1966ev',
        u'Kibble:1967sv',
        u'HiggsObservationATLAS',
        u'HiggsObservationCMS',
        u'CMSLong2013',
        u'atlas_coupling_paper',
        u'atlas_spin_paper',
        u'CMS_combination',
        u'Khachatryan:2014kca',
        u'atlas_mass_paper',
        u'CMS_Hgg',
github inspirehep / refextract / tests / test_pdf.py View on Github external
def test_extract_texkeys_from_pdf_no_crash_on_other_exceptions(pdf_files):
    expected = []
    result = extract_texkeys_from_pdf(pdf_files[5])

    assert result == expected
github inspirehep / refextract / tests / test_pdf.py View on Github external
def test_extract_texkeys_from_pdf_no_crash_on_pydpf2_error(pdf_files):
    expected = []
    result = extract_texkeys_from_pdf(pdf_files[3])

    assert result == expected
github inspirehep / refextract / tests / test_pdf.py View on Github external
def test_extract_texkeys_from_pdf_no_crash_on_incomplete_dest_coordinates(
        pdf_files):
    expected = []
    result = extract_texkeys_from_pdf(pdf_files[2])

    assert result == expected
github inspirehep / refextract / refextract / references / api.py View on Github external
docbody = get_plaintext_document_body(path)
    reflines, dummy, dummy = extract_references_from_fulltext(docbody)
    if not reflines:
        docbody = get_plaintext_document_body(path, keep_layout=True)
        reflines, dummy, dummy = extract_references_from_fulltext(docbody)

    parsed_refs, stats = parse_references(
        reflines,
        recid=recid,
        reference_format=reference_format,
        linker_callback=linker_callback,
        override_kbs_files=override_kbs_files,
    )

    if magic.from_file(path, mime=True) == "application/pdf":
        texkeys = extract_texkeys_from_pdf(path)
        if len(texkeys) == len(parsed_refs):
            parsed_refs = [dict(ref, texkey=[key]) for ref, key in zip(parsed_refs, texkeys)]

    return parsed_refs