How to use the feedparser.FeedParserDict function in feedparser

To help you get started, we’ve selected a few feedparser examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jmoiron / speedparser / tests / speedparsertests.py View on Github external
def load_cache(path):
    """Load a cached feedparser result."""
    jsonpath = path.replace('dat', 'json')
    if not os.path.exists(jsonpath):
        return None
    with open(jsonpath) as f:
        data = json.loads(f.read())
    ret = feedparser.FeedParserDict()
    ret.update(data)
    if 'updated_parsed' in data['feed'] and data['feed']['updated_parsed']:
        try:
            data['feed']['updated_parsed'] = time.gmtime(data['feed']['updated_parsed'])
        except: pass

    ret.feed = feedparser.FeedParserDict(data.get('feed', {}))
    entries = []
    for e in data.get('entries', []):
        if 'updated_parsed' in e and e['updated_parsed']:
            try:
                e['updated_parsed'] = time.gmtime(e['updated_parsed'])
            except: pass
        entries.append(feedparser.FeedParserDict(e))
    ret.entries = entries
    return ret
github pculture / vidscraper / vidscraper / bulk_import / vimeo.py View on Github external
def bulk_import(parsed_feed):
    match = USERNAME_RE.search(parsed_feed.feed.link)
    username = match.group('name')
    if parsed_feed.feed.link in _cached_video_count:
        count = _cached_video_count[parsed_feed.feed.link]
    else:
        count = video_count(parsed_feed)
    parsed_feed = feedparser.FeedParserDict(parsed_feed.copy())
    parsed_feed.entries = []

    consumer = oauth2.Consumer(vimeo.VIMEO_API_KEY, vimeo.VIMEO_API_SECRET)
    client = oauth2.Client(consumer)
    data = {
        'format': 'json',
        'method': 'vimeo.videos.getUploaded',
        'per_page': 50,
        'sort': 'newest',
        'full_response': 'yes',
        'user_id': username
        }
    if username.startswith('channels'):
        del data['user_id']
        data['method'] = 'vimeo.channels.getVideos'
        data['channel_id'] = username.split('/', 1)[1]
github ghoseb / planet.clojure / planet / spider.py View on Github external
def httpThread(thread_index, input_queue, output_queue, log):
    import httplib2
    from httplib import BadStatusLine

    h = httplib2.Http(config.http_cache_directory())
    uri, feed_info = input_queue.get(block=True)
    while uri:
        log.info("Fetching %s via %d", uri, thread_index)
        feed = StringIO('')
        setattr(feed, 'url', uri)
        setattr(feed, 'headers', 
            feedparser.FeedParserDict({'status':'500'}))
        try:
            # map IRI => URI
            try:
                if isinstance(uri,unicode):
                    idna = uri.encode('idna')
                else:
                    idna = uri.decode('utf-8').encode('idna')
                if idna != uri: log.info("IRI %s mapped to %s", uri, idna)
            except:
                log.info("unable to map %s to a URI", uri)
                idna = uri

            # cache control headers
            headers = {}
            if feed_info.feed.has_key('planet_http_etag'):
                headers['If-None-Match'] = feed_info.feed['planet_http_etag']
github ghoseb / planet.clojure / planet / spider.py View on Github external
if data.headers.has_key('-content-hash'):
            data.feed['planet_content_hash'] = data.headers['-content-hash']

    # capture feed and data from the planet configuration file
    if data.get('version'):
        if not data.feed.has_key('links'): data.feed['links'] = list()
        feedtype = 'application/atom+xml'
        if data.version.startswith('rss'): feedtype = 'application/rss+xml'
        if data.version in ['rss090','rss10']: feedtype = 'application/rdf+xml'
        for link in data.feed.links:
            if link.rel == 'self':
                link['type'] = feedtype
                break
        else:
            data.feed.links.append(feedparser.FeedParserDict(
                {'rel':'self', 'type':feedtype, 'href':feed_uri}))
    for name, value in config.feed_options(feed_uri).items():
        data.feed['planet_'+name] = value

    # perform user configured scrub operations on the data
    scrub.scrub(feed_uri, data)

    from planet import idindex
    global index
    if index != None: index = idindex.open()
 
    # select latest entry for each unique id
    ids = {}
    for entry in data.entries:
        # generate an id, if none is present
        if not entry.has_key('id') or not entry.id:
github jmoiron / speedparser / speedparser / speedparser.py View on Github external
def parse_entry(self, entry):
        """An attempt to parse pieces of an entry out w/o xpath, by looping
        over the entry root's children and slotting them into the right places.
        This is going to be way messier than SpeedParserEntries, and maybe
        less cleanly usable, but it should be faster."""

        e = feedparser.FeedParserDict()
        tag_map = self.tag_map
        nslookup = self.nslookup

        for child in entry.getchildren():
            if isinstance(child, etree._Comment):
                continue
            ns, tag = clean_ns(child.tag)
            mapping = tag_map.get(tag, None)
            if mapping:
                getattr(self, 'parse_%s' % mapping)(child, e, nslookup.get(ns, ns))
            if not ns:
                continue
            fulltag = '%s:%s' % (nslookup.get(ns, ''), tag)
            mapping = tag_map.get(fulltag, None)
            if mapping:
                getattr(self, 'parse_%s' % mapping)(child, e, nslookup[ns])
github evilhero / mylar / mylar / rsscheck.py View on Github external
def _start_newznab_attr(self, attrsD):
    context = self._getContext()

    context.setdefault('newznab', feedparser.FeedParserDict())
    context['newznab'].setdefault('tags', feedparser.FeedParserDict())

    name = attrsD.get('name')
    value = attrsD.get('value')

    if name == 'category':
        context['newznab'].setdefault('categories', []).append(value)
    else:
        context['newznab'][name] = value
github socialplanning / opencore / opencore / utility / feedbacker.py View on Github external
headers = {}
        if send_auth and not mship_tool.isAnonymousUser():
            member = mship_tool.getAuthenticatedMember()
            login = member.id
            auth_helper = self._get_auth_helper(context)
            cookie = auth_helper.generateCookie(login)
            headers = dict(Cookie=cookie)

        try:
            resp, content = h.request(req_url,
                                      method="GET",
                                      headers=headers,
                                      )
        except socket.error:
            logger.error('feedbacker server down? %s' % req_url)
            return feedparser.FeedParserDict(entries=[])

        if resp.get('status') != '200':
            logger.error('feedbacker error %s from %s' % (resp.get('status'),
                                                          req_url))
            return feedparser.FeedParserDict(entries=[])

        return feedparser.parse(content)
github Keep-Current / web-miner / webminer / use_cases / request_arxiv / arxiv_repo.py View on Github external
def encode_feedparser_dict(self, fp_dict):
        """
        recursive function to convert the internal feedparse object to a simple dict
        """
        if isinstance(fp_dict, feedparser.FeedParserDict) or isinstance(fp_dict, dict):
            ret_dict = {}
            for key in fp_dict.keys():
                ret_dict[key] = self.encode_feedparser_dict(fp_dict[key])
            return ret_dict
        elif isinstance(fp_dict, list):
            dict_list = []
            for key in fp_dict:
                dict_list.append(self.encode_feedparser_dict(key))
            return dict_list
        else:
            return fp_dict
github 8planes / mirosubs / libs / vidscraper / bulk_import / vimeo.py View on Github external
def feedparser_dict(obj):
    if isinstance(obj, dict):
        return feedparser.FeedParserDict(dict(
                [(key, feedparser_dict(value))
                 for (key, value) in obj.items()]))
    if isinstance(obj, (list, tuple)):
        return [feedparser_dict(member) for member in obj]
    return obj
github karpathy / arxiv-sanity-preserver / fetch_papers.py View on Github external
def encode_feedparser_dict(d):
  """ 
  helper function to get rid of feedparser bs with a deep copy. 
  I hate when libs wrap simple things in their own classes.
  """
  if isinstance(d, feedparser.FeedParserDict) or isinstance(d, dict):
    j = {}
    for k in d.keys():
      j[k] = encode_feedparser_dict(d[k])
    return j
  elif isinstance(d, list):
    l = []
    for k in d:
      l.append(encode_feedparser_dict(k))
    return l
  else:
    return d