Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def demo(self):
self = ReadabilityTool()
text = """
It is for us the living, rather,
to be dedicated here to the unfinished
work which they who fought here have
thus far so nobly advanced. It is
rather for us to be here dedicated
to the great task remaining before us,
that from these honored dead we take
increased devotion to that cause for which they
gave the last full measure of devotion, that we
here highly resolve that these dead shall not have
died in vain, that this nation, under God, shall have a
new birth of freedom, and that government of the people, by
the people, for the people, shall not perish from this earth.
"""
def demo():
ReadabilityTool.demo()
def explorer_stories_csv():
logger.info(flask_login.current_user.name)
filename = 'all-story-urls'
data = request.form
if 'searchId' in data:
solr_q, solr_fq = parse_as_sample(data['searchId'], data['uid'])
filename = filename # don't have this info + current_query['q']
# for demo users we only download 100 random stories (ie. not all matching stories)
return _stream_story_list_csv(filename, solr_q, solr_fq, 100, MediaCloud.SORT_RANDOM, 1)
else:
q = json.loads(data['q'])
filename = file_name_for_download(q['label'], filename)
# now compute total attention for all results
if (len(q['collections']) == 0) and only_queries_reddit(q['sources']):
start_date, end_date = parse_query_dates(q)
stories = pushshift.reddit_top_submissions(query=q['q'], limit=2000,
start_date=start_date, end_date=end_date,
subreddits=pushshift.NEWS_SUBREDDITS)
props = ['stories_id', 'subreddit', 'publish_date', 'score', 'last_updated', 'title', 'url', 'full_link',
'author']
return csv.stream_response(stories, props, filename)
else:
solr_q, solr_fq = parse_query_with_keywords(q)
# now page through all the stories and download them
return _stream_story_list_csv(filename, solr_q, solr_fq)
logging_config['handlers']['file']['filename'] = os.path.join(base_dir, logging_config['handlers']['file']['filename'])
logging.config.dictConfig(logging_config)
logger = logging.getLogger(__name__)
logger.info("---------------------------------------------------------------------------")
flask_login_logger = logging.getLogger('flask_login')
flask_login_logger.setLevel(logging.DEBUG)
server_mode = settings.get('server', 'mode').lower()
if server_mode not in [SERVER_MODE_DEV, SERVER_MODE_PROD]:
logger.error("Unknown server mode '%s', set a mode in the `config/server.config` file", server_mode)
sys.exit(1)
else:
logger.info("Started server in %s mode", server_mode)
# Connect to MediaCloud
mc = mediacloud.api.AdminMediaCloud(settings.get('mediacloud', 'api_key'))
logger.info("Connected to mediacloud")
# Connect to CLIFF
cliff = Cliff(settings.get('cliff', 'host'), settings.get('cliff', 'port'))
# Connect to the app's mongo DB
db_host = settings.get('database', 'host')
db_name = settings.get('database', 'name')
db = AppDatabase(db_host, db_name)
try:
db.check_connection()
except Exception as err:
print("DB error: {0}".format(err))
print("Make sure Mongo is running")
sys.exit()
if len(request.form['sources[]']) > 0:
source_ids = [int(sid) for sid in request.form['sources[]'].split(',')]
# first update the collection
updated_collection = user_mc.updateTag(collection_id, formatted_name, label, description,
is_static=(static == 'true'),
show_on_stories=(show_on_stories == 'true'),
show_on_media=(show_on_media == 'true'))
# get the sources in the collection first, then remove and add as needed
existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)]
source_ids_to_remove = list(set(existing_source_ids) - set(source_ids))
source_ids_to_add = [sid for sid in source_ids if sid not in existing_source_ids]
# logger.debug(existing_source_ids)
# logger.debug(source_ids_to_add)
# logger.debug(source_ids_to_remove)
# then go through and tag all the sources specified with the new collection id
tags_to_add = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_add]
tags_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove]
tags = tags_to_add + tags_to_remove
if len(tags) > 0:
user_mc.tagMedia(tags)
apicache.invalidate_collection_source_representation_cache(user_mediacloud_key(), collection_id)
return jsonify(updated_collection['tag'])
def remove_sources_from_collection(collection_id):
source_ids_to_remove = request.form['sources[]'].split(',')
source_ids_to_remove = [int(s) for s in source_ids_to_remove]
user_mc = user_admin_mediacloud_client()
# get the sources in the collection first, then remove and add as needed
existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)]
source_ids_to_remain = list(set(existing_source_ids) - set(source_ids_to_remove))
media_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove]
media_to_remain = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in
source_ids_to_remain] # do I need to run similar or TAG_ACTION_REMOVE?
current_media = media_to_remove + media_to_remain
if len(current_media) > 0:
results = user_mc.tagMedia(current_media)
apicache.invalidate_collection_source_representation_cache(user_mediacloud_key(), collection_id)
return jsonify(results)
description = request.form['description']
static = request.form['static'] if 'static' in request.form else None
show_on_stories = request.form['showOnStories'] if 'showOnStories' in request.form else None
show_on_media = request.form['showOnMedia'] if 'showOnMedia' in request.form else None
source_ids = []
if len(request.form['sources[]']) > 0:
source_ids = request.form['sources[]'].split(',')
formatted_name = format_name_from_label(label)
# first create the collection
new_collection = user_mc.createTag(TAG_SETS_ID_COLLECTIONS, formatted_name, label, description,
is_static=(static == 'true'),
show_on_stories=(show_on_stories == 'true'),
show_on_media=(show_on_media == 'true'))
# then go through and tag all the sources specified with the new collection id
tags = [MediaTag(sid, tags_id=new_collection['tag']['tags_id'], action=TAG_ACTION_ADD) for sid in source_ids]
if len(tags) > 0:
user_mc.tagMedia(tags)
return jsonify(new_collection['tag'])
source_ids = [int(sid) for sid in request.form['sources[]'].split(',')]
# first update the collection
updated_collection = user_mc.updateTag(collection_id, formatted_name, label, description,
is_static=(static == 'true'),
show_on_stories=(show_on_stories == 'true'),
show_on_media=(show_on_media == 'true'))
# get the sources in the collection first, then remove and add as needed
existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)]
source_ids_to_remove = list(set(existing_source_ids) - set(source_ids))
source_ids_to_add = [sid for sid in source_ids if sid not in existing_source_ids]
# logger.debug(existing_source_ids)
# logger.debug(source_ids_to_add)
# logger.debug(source_ids_to_remove)
# then go through and tag all the sources specified with the new collection id
tags_to_add = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_add]
tags_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove]
tags = tags_to_add + tags_to_remove
if len(tags) > 0:
user_mc.tagMedia(tags)
apicache.invalidate_collection_source_representation_cache(user_mediacloud_key(), collection_id)
return jsonify(updated_collection['tag'])
def _cached_sentence_docs(api_key, keywords, media, start, end, count=10, sort=mcapi.MediaCloud.SORT_RANDOM):
query = app.core.util.solr_query(keywords, media, start, end)
app.core.logger.debug("query: _sentence_docs %s" % query)
start_index = 0
if sort==mcapi.MediaCloud.SORT_RANDOM :
# to sort radomly, we need to offset into the results and set sort to random
# so first we need to know how many senteces there are
sentence_counts = json.loads(_sentence_numfound(api_key, keywords, media, start, end))
sentence_total = sum([day['numFound'] for day in sentence_counts])
sentence_total = min(sentence_total,5000) # don't offset too far into results otherwise query takes a LONG time to return
try:
start_index = randint(0,sentence_total-count)
except Exception as exception:
start_index = 0
res = cached_admin_sentence_list(api_key, query, '', start_index, count, sort=sort)
story_count = cached_story_count(api_key, query)
results = {
def _sentence_numfound(api_key, keywords, media, start, end):
user_mc = mcapi.MediaCloud(api_key)
query = "%s AND (%s)" % (keywords, app.util.media_to_solr(media))
start = datetime.datetime.strptime(start, '%Y-%m-%d').strftime('%Y-%m-%d')
end = datetime.datetime.strptime(end, '%Y-%m-%d').strftime('%Y-%m-%d')
response = user_mc.sentenceCount(query, solr_filter='', split=True, split_daily=True, split_start_date=start, split_end_date=end)
del response['split']['gap']
del response['split']['start']
del response['split']['end']
date_counts = []
for date, num_found in response['split'].iteritems():
date_counts.append({
"date": date[:10]
, "numFound": num_found
})
date_counts = sorted(date_counts, key=lambda d: datetime.datetime.strptime(d["date"], "%Y-%m-%d"))
return json.dumps(date_counts, separators=(',',':'))