Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main():
conn = pymongo.Connection(settings.MONGO_HOST, settings.MONGO_PORT)
tweets = conn['openstates_web']['tweets']
data = urlopen('http://api.twitter.com/1/statuses/user_timeline.json?screen_name=openstates&count=1&trim_user=1')
data = json.loads(data)
tweets.drop()
tweets.insert(data, safe=True)
def s3_get(id):
k = boto.s3.key.Key(s3bucket)
k.key = 'documents/{0}/{1}'.format(id[0:2].lower(), id)
# try and get the object, if it doesn't exist- pull it down
try:
return k.get_contents_as_string()
except:
doc = db.tracked_versions.find_one(id)
if not doc:
return None
data = scrapelib.urlopen(doc['url'].replace(' ', '%20'))
content_type = data.response.headers['content-type']
headers = {'x-amz-acl': 'public-read', 'Content-Type': content_type}
k.set_contents_from_string(data.bytes, headers=headers)
log.debug('pushed %s to s3 as %s', doc['url'], id)
return data.bytes
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), metadata_schema,
validator_cls=APIValidator)
bill_schema = get_json_schema("bill", schema_dir)
spec = {settings.LEVEL_FIELD: abbr}
total_bills = db.bills.find(spec).count()
for i in xrange(0, 100):
bill = db.bills.find(spec)[random.randint(0, total_bills - 1)]
path = "bills/%s/%s/%s/%s" % (abbr, bill['session'],
bill['chamber'], bill['bill_id'])
url = api_url(path)
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), bill_schema,
validator_cls=APIValidator)
legislator_schema = get_json_schema("legislator", schema_dir)
for legislator in db.legislators.find(spec):
path = 'legislators/%s' % legislator['_id']
url = api_url(path)
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), legislator_schema,
validator_cls=APIValidator)
committee_schema = get_json_schema("committee", schema_dir)
for committee in db.committees.find(spec):
path = "committees/%s" % committee['_id']
url = api_url(path)
for i in xrange(0, 100):
bill = db.bills.find(spec)[random.randint(0, total_bills - 1)]
path = "bills/%s/%s/%s/%s" % (abbr, bill['session'],
bill['chamber'], bill['bill_id'])
url = api_url(path)
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), bill_schema,
validator_cls=APIValidator)
legislator_schema = get_json_schema("legislator", schema_dir)
for legislator in db.legislators.find(spec):
path = 'legislators/%s' % legislator['_id']
url = api_url(path)
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), legislator_schema,
validator_cls=APIValidator)
committee_schema = get_json_schema("committee", schema_dir)
for committee in db.committees.find(spec):
path = "committees/%s" % committee['_id']
url = api_url(path)
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), committee_schema,
validator_cls=APIValidator)
event_schema = get_json_schema("event", schema_dir)
total_events = db.events.find(spec).count()
if total_events:
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), committee_schema,
validator_cls=APIValidator)
validate_xml(url, xml_schema)
event_schema = get_json_schema("event", schema_dir)
total_events = db.events.find(spec).count()
if total_events:
for i in xrange(0, 10):
event = db.events.find(spec)[random.randint(0, total_events - 1)]
path = "events/%s" % event['_id']
url = api_url(path)
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), event_schema,
validator_cls=APIValidator)
validate_xml(url, xml_schema)
for legislator in db.legislators.find(spec):
path = 'legislators/%s' % legislator['_id']
url = api_url(path)
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), legislator_schema,
validator_cls=APIValidator)
validate_xml(url, xml_schema)
committee_schema = get_json_schema("committee", schema_dir)
for committee in db.committees.find(spec):
path = "committees/%s" % committee['_id']
url = api_url(path)
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), committee_schema,
validator_cls=APIValidator)
validate_xml(url, xml_schema)
event_schema = get_json_schema("event", schema_dir)
total_events = db.events.find(spec).count()
if total_events:
for i in xrange(0, 10):
event = db.events.find(spec)[random.randint(0, total_events - 1)]
path = "events/%s" % event['_id']
url = api_url(path)
json_response = scrapelib.urlopen(url)
validictory.validate(json.loads(json_response), event_schema,
def validate_xml(url, schema):
response = scrapelib.urlopen(url + "&format=xml")
xml = lxml.etree.fromstring(response)
for child in xml.xpath("/results/*"):
schema.assertValid(child)