How to use the feedparser.PREFERRED_XML_PARSERS function in feedparser

To help you get started, we’ve selected a few feedparser examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github taozhijiang / readmeinfo / utils.py View on Github external
def fixed_feedparser_parse(uri):
    try:
        return feedparser.parse(uri)
    except TypeError:
        if 'drv_libxml2' in feedparser.PREFERRED_XML_PARSERS:
            feedparser.PREFERRED_XML_PARSERS.remove('drv_libxml2')
            return feedparser.parse(uri)
        else:
            raise
github freiheit / discord_feedbot / show_sample_entry.py View on Github external
#!/usr/bin/env python3
# Copyright (c) 2016-2017 Eric Eisenhart
# This software is released under an MIT-style license.
# See LICENSE.md for full details.

import pprint
import sys
import feedparser

feedparser.PREFERRED_XML_PARSERS.remove("drv_libxml2")


# 0 is command itself:
if len(sys.argv) == 2:
    feed_url = sys.argv[1]
    feed_data = feedparser.parse(feed_url)
    pp = pprint.PrettyPrinter(indent=4, depth=1)
    print("# We currently restrict this output to depth=1,")
    print("# because that's all the bot can currently handle.")
    print(
        "# So, ignore those `[...]` and `{...}` structures and only look at 'strings'."
    )
    pp.pprint(feed_data.entries[0])
else:
    print(
        "Give me 1 feed URL on the command-line, and I'll give the first entry from it."
github taozhijiang / readmeinfo / Feedfetch.py View on Github external
def fixed_feedparser_parse(self, uri):
        try:
            return feedparser.parse(uri)
        except TypeError:
            if 'drv_libxml2' in feedparser.PREFERRED_XML_PARSERS:
                feedparser.PREFERRED_XML_PARSERS.remove('drv_libxml2')
                return feedparser.parse(uri)
            else:
                raise
github freiheit / discord_feedbot / feed2discord.py View on Github external
# re-evaluating them.
    conn.execute(SQL_CLEAN_OLD_ITEMS)

    conn.close()


config, logger = get_config()

# Make main config area global, since used everywhere/anywhere
MAIN = config["MAIN"]
TIMEZONE = get_timezone(config)


# Crazy workaround for a bug with parsing that doesn't apply on all
# pythons:
feedparser.PREFERRED_XML_PARSERS.remove("drv_libxml2")


# global discord client object
# No offline_members as we don't use them
client = discord.Client(fetch_offline_members=False)


def extract_best_item_date(item, tzinfo):
    # This function loops through all the common date fields for an item in
    # a feed, and extracts the "best" one.  Falls back to "now" if nothing
    # is found.
    fields = ("published", "pubDate", "date", "created", "updated")
    for date_field in fields:
        if date_field in item and len(item[date_field]) > 0:
            try:
                date_obj = parse_datetime(item[date_field])
github freiheit / discord_feedbot / show_all_entries.py View on Github external
#!/usr/bin/env python3
# Copyright (c) 2016-2017 Eric Eisenhart
# This software is released under an MIT-style license.
# See LICENSE.md for full details.

import pprint
import sys
import feedparser

feedparser.PREFERRED_XML_PARSERS.remove("drv_libxml2")


# 0 is command itself:
if len(sys.argv) == 2:
    feed_url = sys.argv[1]
    feed_data = feedparser.parse(feed_url)
    pp = pprint.PrettyPrinter(indent=4, depth=2)
    print("# We currently restrict this output to depth=1,")
    print("# because that's all the bot can currently handle.")
    print(
        "# So, ignore those `[...]` and `{...}` structures and only look at 'strings'."
    )
    pp.pprint(feed_data.entries)
else:
    print(
        "Give me 1 feed URL on the command-line, and I'll give the first entry from it."
github firstlookmedia / autocanary / autocanary / headlines.py View on Github external
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see .
"""

import feedparser
# Avoid feedparser python3 bug, see https://github.com/manolomartinez/greg/issues/47
feedparser.PREFERRED_XML_PARSERS.remove('drv_libxml2')

from . import common

config = {
    'feed_url': 'https://en.wikinews.org/w/index.php?title=Special:NewsFeed&feed=rss&categories=Published¬categories=No%20publish|Archived|AutoArchived|disputed&namespace=0&count=5&ordermethod=categoryadd&stablepages=only',
    'headline_bullet': u"\u2022"
}

class Headlines(object):
    def __init__(self):
        self.enabled = False
        self.have_headlines = False
        self.headlines_str = None

    def fetch_headlines(self):
        # --- feed.entries is empty list on fail.
github lemon24 / reader / reader / _feedparser_parse_data.py View on Github external
contentloc = result['headers'].get('content-location', '')
    href = result.get('href', '')
    baseuri = _makeSafeAbsoluteURI(href, contentloc) or _makeSafeAbsoluteURI(contentloc) or href

    baselang = result['headers'].get('content-language', None)
    if isinstance(baselang, bytes_) and baselang is not None:
        baselang = baselang.decode('utf-8', 'ignore')

    if not _XML_AVAILABLE:
        use_strict_parser = 0
    if use_strict_parser:
        # initialize the SAX parser
        feedparser = StrictFeedParser(baseuri, baselang, 'utf-8')
        feedparser.resolve_relative_uris = resolve_relative_uris
        feedparser.sanitize_html = sanitize_html
        saxparser = xml.sax.make_parser(PREFERRED_XML_PARSERS)
        saxparser.setFeature(xml.sax.handler.feature_namespaces, 1)
        try:
            # disable downloading external doctype references, if possible
            saxparser.setFeature(xml.sax.handler.feature_external_ges, 0)
        except xml.sax.SAXNotSupportedException:
            pass
        saxparser.setContentHandler(feedparser)
        saxparser.setErrorHandler(feedparser)
        source = xml.sax.xmlreader.InputSource()
        source.setByteStream(_StringIO(data))
        try:
            saxparser.parse(source)
        except xml.sax.SAXException as e:
            result['bozo'] = 1
            result['bozo_exception'] = feedparser.exc or e
            use_strict_parser = 0
github taozhijiang / readmeinfo / Feedfetch.py View on Github external
def fixed_feedparser_parse(self, uri):
        try:
            return feedparser.parse(uri)
        except TypeError:
            if 'drv_libxml2' in feedparser.PREFERRED_XML_PARSERS:
                feedparser.PREFERRED_XML_PARSERS.remove('drv_libxml2')
                return feedparser.parse(uri)
            else:
                raise
github taozhijiang / readmeinfo / utils.py View on Github external
def fixed_feedparser_parse(uri):
    try:
        return feedparser.parse(uri)
    except TypeError:
        if 'drv_libxml2' in feedparser.PREFERRED_XML_PARSERS:
            feedparser.PREFERRED_XML_PARSERS.remove('drv_libxml2')
            return feedparser.parse(uri)
        else:
            raise
github rss2email / rss2email / rss2email / feed.py View on Github external
from . import email as _email
from . import error as _error
from . import util as _util


_urllib_request.install_opener(_urllib_request.build_opener())
_SOCKET_ERRORS = []
for e in ['error', 'herror', 'gaierror']:
    if hasattr(_socket, e):
        _SOCKET_ERRORS.append(getattr(_socket, e))
del e  # cleanup namespace
_SOCKET_ERRORS = tuple(_SOCKET_ERRORS)

# drv_libxml2 raises:
#   TypeError: 'str' does not support the buffer interface
_feedparser.PREFERRED_XML_PARSERS = []


class Feed (object):
    """Utility class for feed manipulation and storage.

    >>> import pickle
    >>> import sys
    >>> from .config import CONFIG

    >>> feed = Feed(
    ...    name='test-feed', url='http://example.com/feed.atom', to='a@b.com')
    >>> print(feed)
    test-feed (http://example.com/feed.atom -> a@b.com)
    >>> feed.section
    'feed.test-feed'
    >>> feed.from_email