How to use mediacloud - 10 common examples

To help you get started, we’ve selected a few mediacloud examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mitmedialab / MediaCloud-API-Client / mediacloud / readability / readabilitytests.py View on Github external
def demo(self):
        self = ReadabilityTool()
        text = """
                It is for us the living, rather,
                to be dedicated here to the unfinished
                work which they who fought here have
                thus far so nobly advanced. It is
                rather for us to be here dedicated
                to the great task remaining before us,
                that from these honored dead we take 
                increased devotion to that cause for which they
                gave the last full measure of devotion, that we
                here highly resolve that these dead shall not have
                died in vain, that this nation, under God, shall have a
                new birth of freedom, and that government of the people, by
                the people, for the people, shall not perish from this earth.  
               """
github mitmedialab / MediaCloud-Web-Tools / server / views / explorer / story_samples.py View on Github external
def explorer_stories_csv():
    logger.info(flask_login.current_user.name)
    filename = 'all-story-urls'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['uid'])
        filename = filename  # don't have this info + current_query['q']
        # for demo users we only download 100 random stories (ie. not all matching stories)
        return _stream_story_list_csv(filename, solr_q, solr_fq, 100, MediaCloud.SORT_RANDOM, 1)
    else:
        q = json.loads(data['q'])
        filename = file_name_for_download(q['label'], filename)
        # now compute total attention for all results
        if (len(q['collections']) == 0) and only_queries_reddit(q['sources']):
            start_date, end_date = parse_query_dates(q)
            stories = pushshift.reddit_top_submissions(query=q['q'], limit=2000,
                                                       start_date=start_date, end_date=end_date,
                                                       subreddits=pushshift.NEWS_SUBREDDITS)
            props = ['stories_id', 'subreddit', 'publish_date', 'score', 'last_updated', 'title', 'url', 'full_link',
                     'author']
            return csv.stream_response(stories, props, filename)
        else:
            solr_q, solr_fq = parse_query_with_keywords(q)
            # now page through all the stories and download them
            return _stream_story_list_csv(filename, solr_q, solr_fq)
github mitmedialab / MediaCloud-Web-Tools / server / __init__.py View on Github external
logging_config['handlers']['file']['filename'] = os.path.join(base_dir, logging_config['handlers']['file']['filename'])
logging.config.dictConfig(logging_config)
logger = logging.getLogger(__name__)
logger.info("---------------------------------------------------------------------------")
flask_login_logger = logging.getLogger('flask_login')
flask_login_logger.setLevel(logging.DEBUG)

server_mode = settings.get('server', 'mode').lower()
if server_mode not in [SERVER_MODE_DEV, SERVER_MODE_PROD]:
    logger.error("Unknown server mode '%s', set a mode in the `config/server.config` file", server_mode)
    sys.exit(1)
else:
    logger.info("Started server in %s mode", server_mode)

# Connect to MediaCloud
mc = mediacloud.api.AdminMediaCloud(settings.get('mediacloud', 'api_key'))
logger.info("Connected to mediacloud")

# Connect to CLIFF
cliff = Cliff(settings.get('cliff', 'host'), settings.get('cliff', 'port'))

# Connect to the app's mongo DB
db_host = settings.get('database', 'host')
db_name = settings.get('database', 'name')
db = AppDatabase(db_host, db_name)

try:
    db.check_connection()
except Exception as err:
    print("DB error: {0}".format(err))
    print("Make sure Mongo is running")
    sys.exit()
github mitmedialab / MediaCloud-Web-Tools / server / views / sources / collectionedit.py View on Github external
if len(request.form['sources[]']) > 0:
        source_ids = [int(sid) for sid in request.form['sources[]'].split(',')]
    # first update the collection
    updated_collection = user_mc.updateTag(collection_id, formatted_name, label, description,
                                           is_static=(static == 'true'),
                                           show_on_stories=(show_on_stories == 'true'),
                                           show_on_media=(show_on_media == 'true'))
    # get the sources in the collection first, then remove and add as needed
    existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)]
    source_ids_to_remove = list(set(existing_source_ids) - set(source_ids))
    source_ids_to_add = [sid for sid in source_ids if sid not in existing_source_ids]
    # logger.debug(existing_source_ids)
    # logger.debug(source_ids_to_add)
    # logger.debug(source_ids_to_remove)
    # then go through and tag all the sources specified with the new collection id
    tags_to_add = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_add]
    tags_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove]
    tags = tags_to_add + tags_to_remove
    if len(tags) > 0:
        user_mc.tagMedia(tags)
        apicache.invalidate_collection_source_representation_cache(user_mediacloud_key(), collection_id)
    return jsonify(updated_collection['tag'])
github mitmedialab / MediaCloud-Web-Tools / server / views / sources / collectionedit.py View on Github external
def remove_sources_from_collection(collection_id):
    source_ids_to_remove = request.form['sources[]'].split(',')
    source_ids_to_remove = [int(s) for s in source_ids_to_remove]
    user_mc = user_admin_mediacloud_client()
    # get the sources in the collection first, then remove and add as needed
    existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)]
    source_ids_to_remain = list(set(existing_source_ids) - set(source_ids_to_remove))

    media_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove]
    media_to_remain = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in
                       source_ids_to_remain]  # do I need to run similar or TAG_ACTION_REMOVE?
    current_media = media_to_remove + media_to_remain

    if len(current_media) > 0:
        results = user_mc.tagMedia(current_media)

    apicache.invalidate_collection_source_representation_cache(user_mediacloud_key(), collection_id)
    return jsonify(results)
github mitmedialab / MediaCloud-Web-Tools / server / views / sources / collection.py View on Github external
description = request.form['description']
    static = request.form['static'] if 'static' in request.form else None
    show_on_stories = request.form['showOnStories'] if 'showOnStories' in request.form else None
    show_on_media = request.form['showOnMedia'] if 'showOnMedia' in request.form else None
    source_ids = []
    if len(request.form['sources[]']) > 0:
        source_ids = request.form['sources[]'].split(',')

    formatted_name = format_name_from_label(label)
    # first create the collection
    new_collection = user_mc.createTag(TAG_SETS_ID_COLLECTIONS, formatted_name, label, description,
                                       is_static=(static == 'true'),
                                       show_on_stories=(show_on_stories == 'true'),
                                       show_on_media=(show_on_media == 'true'))
    # then go through and tag all the sources specified with the new collection id
    tags = [MediaTag(sid, tags_id=new_collection['tag']['tags_id'], action=TAG_ACTION_ADD) for sid in source_ids]
    if len(tags) > 0:
        user_mc.tagMedia(tags)
    return jsonify(new_collection['tag'])
github mitmedialab / MediaCloud-Web-Tools / server / views / sources / collectionedit.py View on Github external
source_ids = [int(sid) for sid in request.form['sources[]'].split(',')]
    # first update the collection
    updated_collection = user_mc.updateTag(collection_id, formatted_name, label, description,
                                           is_static=(static == 'true'),
                                           show_on_stories=(show_on_stories == 'true'),
                                           show_on_media=(show_on_media == 'true'))
    # get the sources in the collection first, then remove and add as needed
    existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)]
    source_ids_to_remove = list(set(existing_source_ids) - set(source_ids))
    source_ids_to_add = [sid for sid in source_ids if sid not in existing_source_ids]
    # logger.debug(existing_source_ids)
    # logger.debug(source_ids_to_add)
    # logger.debug(source_ids_to_remove)
    # then go through and tag all the sources specified with the new collection id
    tags_to_add = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_add]
    tags_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove]
    tags = tags_to_add + tags_to_remove
    if len(tags) > 0:
        user_mc.tagMedia(tags)
        apicache.invalidate_collection_source_representation_cache(user_mediacloud_key(), collection_id)
    return jsonify(updated_collection['tag'])
github mitmedialab / MediaCloud-Dashboard / app / core / views.py View on Github external
def _cached_sentence_docs(api_key, keywords, media, start, end, count=10, sort=mcapi.MediaCloud.SORT_RANDOM):
    query = app.core.util.solr_query(keywords, media, start, end)
    app.core.logger.debug("query: _sentence_docs %s" % query)
    start_index = 0
    if sort==mcapi.MediaCloud.SORT_RANDOM :
        # to sort radomly, we need to offset into the results and set sort to random
        # so first we need to know how many senteces there are
        sentence_counts = json.loads(_sentence_numfound(api_key, keywords, media, start, end))
        sentence_total = sum([day['numFound'] for day in sentence_counts])
        sentence_total = min(sentence_total,5000)   # don't offset too far into results otherwise query takes a LONG time to return
        try:
            start_index = randint(0,sentence_total-count)
        except Exception as exception:
            start_index = 0
    res = cached_admin_sentence_list(api_key, query, '', start_index, count, sort=sort)
    story_count = cached_story_count(api_key, query)
    results = {
github mitmedialab / MediaCloud-Dashboard / app / views.py View on Github external
def _sentence_numfound(api_key, keywords, media, start, end):
    user_mc = mcapi.MediaCloud(api_key)
    query = "%s AND (%s)" % (keywords, app.util.media_to_solr(media))
    start = datetime.datetime.strptime(start, '%Y-%m-%d').strftime('%Y-%m-%d')
    end = datetime.datetime.strptime(end, '%Y-%m-%d').strftime('%Y-%m-%d')
    response = user_mc.sentenceCount(query, solr_filter='', split=True, split_daily=True, split_start_date=start, split_end_date=end)
    del response['split']['gap']
    del response['split']['start']
    del response['split']['end']
    date_counts = []
    for date, num_found in response['split'].iteritems():
        date_counts.append({
            "date": date[:10]
            , "numFound": num_found
        })
    date_counts = sorted(date_counts, key=lambda d: datetime.datetime.strptime(d["date"], "%Y-%m-%d"))
    return json.dumps(date_counts, separators=(',',':'))