How to use the spatula.CSV function in spatula

To help you get started, we’ve selected a few spatula examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openstates / openstates / openstates / mn / people.py View on Github external
import collections
import logging
import lxml.html
import re

from pupa.scrape import Person, Scraper
from spatula import Page, CSV, Spatula
from openstates.utils import validate_phone_number, validate_email_address

PARTIES = {"DFL": "Democratic-Farmer-Labor", "R": "Republican"}


class SenList(CSV):
    url = "http://www.senate.mn/members/member_list_ascii.php?ls="
    _html_url = "http://www.senate.mn/members/index.php"

    def __init__(self, scraper, url=None, *, obj=None, **kwargs):
        super().__init__(scraper, url=url, obj=obj, **kwargs)
        self._scrape_extra_info()

    def _scrape_extra_info(self):
        self.extra_info = collections.defaultdict(dict)

        resp = self.scraper.get(self._html_url)
        doc = lxml.html.fromstring(resp.text)
        doc.make_links_absolute(self._html_url)
        xpath = '//div[@id="alphabetically"]' '//div[@class="media my-3"]'
        for div in doc.xpath(xpath):
            main_link, email_link = filter(

spatula

A modern Python library for writing maintainable web scrapers.

MIT
Latest version published 2 years ago

Package Health Score

48 / 100
Full package analysis