How to use the scrapelib.urlopen function in scrapelib

To help you get started, we’ve selected a few scrapelib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openstates / legacy-openstates.org / tweets / latest_tweet.py View on Github external
def main():
    conn = pymongo.Connection(settings.MONGO_HOST, settings.MONGO_PORT)
    tweets = conn['openstates_web']['tweets']
    data = urlopen('http://api.twitter.com/1/statuses/user_timeline.json?screen_name=openstates&count=1&trim_user=1')
    data = json.loads(data)
    tweets.drop()
    tweets.insert(data, safe=True)
github openstates / billy / billy / fulltext / __init__.py View on Github external
def s3_get(id):
    k = boto.s3.key.Key(s3bucket)
    k.key = 'documents/{0}/{1}'.format(id[0:2].lower(), id)

    # try and get the object, if it doesn't exist- pull it down
    try:
        return k.get_contents_as_string()
    except:
        doc = db.tracked_versions.find_one(id)
        if not doc:
            return None
        data = scrapelib.urlopen(doc['url'].replace(' ', '%20'))
        content_type = data.response.headers['content-type']
        headers = {'x-amz-acl': 'public-read', 'Content-Type': content_type}
        k.set_contents_from_string(data.bytes, headers=headers)
        log.debug('pushed %s to s3 as %s', doc['url'], id)
        return data.bytes
github openstates / billy / billy / commands / validate_api.py View on Github external
json_response = scrapelib.urlopen(url)
    validictory.validate(json.loads(json_response), metadata_schema,
                         validator_cls=APIValidator)

    bill_schema = get_json_schema("bill", schema_dir)

    spec = {settings.LEVEL_FIELD: abbr}
    total_bills = db.bills.find(spec).count()

    for i in xrange(0, 100):
        bill = db.bills.find(spec)[random.randint(0, total_bills - 1)]
        path = "bills/%s/%s/%s/%s" % (abbr, bill['session'],
                                      bill['chamber'], bill['bill_id'])
        url = api_url(path)

        json_response = scrapelib.urlopen(url)
        validictory.validate(json.loads(json_response), bill_schema,
                                 validator_cls=APIValidator)

    legislator_schema = get_json_schema("legislator", schema_dir)
    for legislator in db.legislators.find(spec):
        path = 'legislators/%s' % legislator['_id']
        url = api_url(path)

        json_response = scrapelib.urlopen(url)
        validictory.validate(json.loads(json_response), legislator_schema,
                             validator_cls=APIValidator)

    committee_schema = get_json_schema("committee", schema_dir)
    for committee in db.committees.find(spec):
        path = "committees/%s" % committee['_id']
        url = api_url(path)
github openstates / billy / billy / commands / validate_api.py View on Github external
for i in xrange(0, 100):
        bill = db.bills.find(spec)[random.randint(0, total_bills - 1)]
        path = "bills/%s/%s/%s/%s" % (abbr, bill['session'],
                                      bill['chamber'], bill['bill_id'])
        url = api_url(path)

        json_response = scrapelib.urlopen(url)
        validictory.validate(json.loads(json_response), bill_schema,
                                 validator_cls=APIValidator)

    legislator_schema = get_json_schema("legislator", schema_dir)
    for legislator in db.legislators.find(spec):
        path = 'legislators/%s' % legislator['_id']
        url = api_url(path)

        json_response = scrapelib.urlopen(url)
        validictory.validate(json.loads(json_response), legislator_schema,
                             validator_cls=APIValidator)

    committee_schema = get_json_schema("committee", schema_dir)
    for committee in db.committees.find(spec):
        path = "committees/%s" % committee['_id']
        url = api_url(path)

        json_response = scrapelib.urlopen(url)
        validictory.validate(json.loads(json_response), committee_schema,
                             validator_cls=APIValidator)

    event_schema = get_json_schema("event", schema_dir)
    total_events = db.events.find(spec).count()

    if total_events:
github openstates / openstates / billy / bin / validate_api.py View on Github external
json_response = scrapelib.urlopen(url)
        validictory.validate(json.loads(json_response), committee_schema,
                             validator_cls=APIValidator)

        validate_xml(url, xml_schema)

    event_schema = get_json_schema("event", schema_dir)
    total_events = db.events.find(spec).count()

    if total_events:
        for i in xrange(0, 10):
            event = db.events.find(spec)[random.randint(0, total_events - 1)]
            path = "events/%s" % event['_id']
            url = api_url(path)

            json_response = scrapelib.urlopen(url)
            validictory.validate(json.loads(json_response), event_schema,
                                 validator_cls=APIValidator)

            validate_xml(url, xml_schema)
github openstates / openstates / billy / bin / validate_api.py View on Github external
for legislator in db.legislators.find(spec):
        path = 'legislators/%s' % legislator['_id']
        url = api_url(path)

        json_response = scrapelib.urlopen(url)
        validictory.validate(json.loads(json_response), legislator_schema,
                             validator_cls=APIValidator)

        validate_xml(url, xml_schema)

    committee_schema = get_json_schema("committee", schema_dir)
    for committee in db.committees.find(spec):
        path = "committees/%s" % committee['_id']
        url = api_url(path)

        json_response = scrapelib.urlopen(url)
        validictory.validate(json.loads(json_response), committee_schema,
                             validator_cls=APIValidator)

        validate_xml(url, xml_schema)

    event_schema = get_json_schema("event", schema_dir)
    total_events = db.events.find(spec).count()

    if total_events:
        for i in xrange(0, 10):
            event = db.events.find(spec)[random.randint(0, total_events - 1)]
            path = "events/%s" % event['_id']
            url = api_url(path)

            json_response = scrapelib.urlopen(url)
            validictory.validate(json.loads(json_response), event_schema,
github openstates / openstates / billy / bin / validate_api.py View on Github external
def validate_xml(url, schema):
    response = scrapelib.urlopen(url + "&format=xml")
    xml = lxml.etree.fromstring(response)
    for child in xml.xpath("/results/*"):
        schema.assertValid(child)