How to use the parsel.Selector function in parsel

To help you get started, we’ve selected a few parsel examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapy / parsel / tests / test_selector.py View on Github external
def test_smart_strings(self):
        """Lxml smart strings return values"""

        class SmartStringsSelector(Selector):
            _lxml_smart_strings = True

        body = u"""
                    <div class="one">
                      <ul>
                        <li>one</li><li>two</li>
                      </ul>
                    </div>
                    <div class="two">
                      <ul>
                        <li>four</li><li>five</li><li>six</li>
                      </ul>
                    </div>
                  """

        # .getparent() is available for text nodes and attributes
github scrapy / parsel / tests / test_xpathfuncs.py View on Github external
def test_has_class_error_no_args(self):
        body = u"""
        <p class="foo">First</p>
        """
        sel = Selector(text=body)
        self.assertRaisesRegexp(
            ValueError, 'has-class must have at least 1 argument',
            sel.xpath, 'has-class()')
github GovReady / govready-q / testmocking / web.py View on Github external
def _use_page(self, response):
        self.response = response
        self.selector = parsel.Selector(text=response.text)
github GovReady / govready-q / testmocking / data_management.py View on Github external
def login(username, password, domain):
    session = requests.Session()
    response = session.get(domain)
    return parsel.Selector(text=response.text)
github kevinheavey / fifa18-even-more-player-data / crawler / create_constants.py View on Github external
def get_all_traits_and_specialities():
    url = 'https://sofifa.com/players/top'
    html = requests.get(url).text
    selector = parsel.Selector(html)
    path = './body/section[1]/section[1]/aside[1]/form[1]/div[last()]/div[position() >= last() - 2]/select'
    relevant = selector.xpath(path)
    end_of_path = 'option/text()'
    traits_raw = relevant[:2].xpath(end_of_path).extract()
    # some traits are duplicated unfortunately
    traits = list(np.unique([t.strip() for t in traits_raw if t != 'trait.']))
    all_traits = [t + '_trait' for t in traits]
    specialities_raw = relevant[2].xpath(end_of_path).extract()
    # it's important to add the speciality flag
    # because there is a strength speciality and a strength attribute
    all_specialities = [s.strip() + '_speciality' for s in specialities_raw]
    return {'traits': all_traits, 'specialities': all_specialities}
github kevinheavey / fifa18-even-more-player-data / crawler / overview_data.py View on Github external
def parse_single_overview_page(html):
    table_selector = parsel.Selector(html)
    row_dicts = []
    for row_selector in table_selector.xpath('./body/table/tbody/tr'):
        row_dicts.append(parse_single_row(row_selector))
    return row_dicts
github NervanaSystems / ngraph-neon / src / neon / frontend / graph.py View on Github external
# Get the op named "conv_filter'
            subgraph.select("#conv_filter")

            # Get the "bias" ops within Affine layers
            subgraph.select("Affine .bias")

            # Get all TensorValueOps
            subgraph.select("TensorValueOp")

            # Get all ops from timestep 3 in an RNN (ie with metadata "recurrent_step=3")
            subgraph.select("[recurrent_step=3]")
        """

        ops = list()
        for selected in parsel.Selector(self._to_xml()).css(css):
            op = self._selector_to_op(selected)
            if op is not None:
                ops.append(op)

        return ops
github pr0gramista / memes-api / parsers / mistrzowie.py View on Github external
def parse(html):
    document = Selector(text=html)
    memes = [catch_errors(parse_meme, element) for element in document.css("div.pic")]
    memes = [meme for meme in memes if meme is not None]

    title = document.css("title::text").get()
    next_page_url = "/mistrzowie/page/" + get_last_part_url(
        document.css(".list_next_page_button::attr(href)").get()
    )
    return Page(title, memes, next_page_url)
github alphardex / looter / examples / tieba.py View on Github external
def crawl(url):
    text = requests.get(url, headers=lt.DEFAULT_HEADERS).text
    text = text.replace('', '')
    tree = Selector(text=text)
    items = tree.css('ul#thread_list li.j_thread_list')
    for item in items:
        data = {}
        data['title'] = item.css('a.j_th_tit::text').extract_first()
        data['abstract'] = item.css('.threadlist_abs::text').extract_first().strip()
        data['url'] = f"{domain}{item.css('a.j_th_tit::attr(href)').extract_first()}"
        data['author'] = item.css('a.frs-author-name::text').extract_first()
        data['reply'] = int(item.css('span.threadlist_rep_num::text').extract_first())
        data['date'] = item.css('.threadlist_reply_date::text').extract_first().strip()
        yield data
github kevinheavey / fifa18-even-more-player-data / crawler / html_download.py View on Github external
def _get_relevant_league_overview_html(league_overview_html):
    selector = parsel.Selector(text=league_overview_html)
    table_selector = selector.xpath('/html/body/section/section/article/table')
    return table_selector.extract_first()