How to use the hepcrawl.spiders.elsevier_spider.ElsevierSpider function in hepcrawl

To help you get started, we’ve selected a few hepcrawl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def parsed_node():
    """Test data that have different values than in the sample record."""
    # NOTE: this tries to make a GET request
    with requests_mock.Mocker() as mock:
        mock.head(
            'http://www.sciencedirect.com/science/article/pii/sample_consyn_record',
            headers={
                'Content-Type': 'text/html',
            }
        )
        spider = elsevier_spider.ElsevierSpider()
        body = """
        
            
                
                http://vtw.elsevier.com/data/voc/oa/OpenAccessStatus#Full
                
                2014-11-11T08:38:44Z
                
                SCOAP³ - Sponsoring Consortium for Open Access Publishing in Particle Physics
                http://vtw.elsevier.com/data/voc/oa/SponsorType#FundingBody
                
                http://creativecommons.org/licenses/by/3.0/
github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def sd_url():
    spider = elsevier_spider.ElsevierSpider()
    xml_file = 'elsevier/sample_consyn_record.xml'
    return spider._get_sd_url(xml_file)
github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def ref_simple_journal():
    """ Simple journal article, two authors et al., paginated by issue. With notes."""
    spider = elsevier_spider.ElsevierSpider()
    body = """
    
    
        [1]
        
            
            
                
                A.
                Päiviö
                
                
                L.J.
                Becker
github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def ref_edited_book():
    """Edited book. In this example the whole edited book is cited."""
    spider = elsevier_spider.ElsevierSpider()
    body = """
    
    
        [11]
        
            
            
                
                
                    S.
                    Letheridge
                
                
                    C.R.
                    Cannon
github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def authors():
    """Authors with different kinds of structures: affiliations,
    group affiliations, collaborations, two different author groups, and
    two alternative ways of writing affiliation info: ce:textfn or sa:affiliation."""
    spider = elsevier_spider.ElsevierSpider()
    body = """
    
    
        
          Physical
          Scientist
          
            a
          
        
        
          Philosophical
          Doctor
          
            b
github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def ref_translated_article():
    """ Non-English journal article, with an English sb:translated-title."""
    spider = elsevier_spider.ElsevierSpider()
    body = """
    
    
        [4]
        
            
            
                
                E.M.H.
                Assink
                
                
                N.
                Verloop
github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def sciencedirect_proof():
    """Scrape data from a minimal example web page. This hasn't been published
    yet. There is only the online paper, i.e. this is a proof.
    """
    spider = elsevier_spider.ElsevierSpider()
    body = """
    
    
        
    
    """
    response = fake_response_from_string(body)
    response.meta["keys_missing"] = set(["volume"])
    response.meta["info"] = {}
    response.meta["node"] = get_node(spider, '/head', text=body)
    return spider.scrape_sciencedirect(response)
github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def cover_display_date_y_m():
    """Parse and build the record with only date (month and year)."""
    spider = elsevier_spider.ElsevierSpider()
    body = """
    
        
            December 2014
        
    """
    node = get_node(spider, '/doc', text=body)
    response = fake_response_from_string(body)
    parsed_item = spider.parse_node(response, node)
    assert parsed_item
    assert parsed_item.record

    return parsed_item.record
github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def conference():
    """Test conference doctype and collection detection.

    This also has simple-article element, but it should
    be overridden by the conference doctype."""
    spider = elsevier_spider.ElsevierSpider()
    body = """
    
    
        xaxaxa
    
    
        International Conference on conferences
        CERN, Geneva
        
        20200315
        
    
    """
    node = get_node(spider, '/doc', text=body)
    doctype = spider.get_doctype(node)
    return spider.get_collections(doctype)
github inspirehep / hepcrawl / tests / unit / test_elsevier.py View on Github external
def ref_book_proceedings_article():
    """Article in proceedings published as a book."""
    spider = elsevier_spider.ElsevierSpider()
    body = """
    
    
        [10]
        
            
            
                
                T.E.
                Chaddock
                
            
            
                Gastric emptying of a nutritionally balanced diet