How to use the feedparser.urls.make_safe_absolute_uri function in feedparser

To help you get started, we’ve selected a few feedparser examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kurtmckee / feedparser / tests / runtests.py View on Github external
def test_catch_ValueError(self):
        """catch ValueError in Python 2.7 and up"""
        uri = 'http://bad]test/'
        value1 = feedparser.urls.make_safe_absolute_uri(uri)
        value2 = feedparser.urls.make_safe_absolute_uri(self.base, uri)
        swap = feedparser.urls.ACCEPTABLE_URI_SCHEMES
        feedparser.urls.ACCEPTABLE_URI_SCHEMES = ()
        value3 = feedparser.urls.make_safe_absolute_uri(self.base, uri)
        feedparser.urls.ACCEPTABLE_URI_SCHEMES = swap
        # Only Python 2.7 and up throw a ValueError, otherwise uri is returned
        self.assertTrue(value1 in (uri, ''))
        self.assertTrue(value2 in (uri, ''))
        self.assertTrue(value3 in (uri, ''))
github kurtmckee / feedparser / tests / runtests.py View on Github external
def test_catch_ValueError(self):
        """catch ValueError in Python 2.7 and up"""
        uri = 'http://bad]test/'
        value1 = feedparser.urls.make_safe_absolute_uri(uri)
        value2 = feedparser.urls.make_safe_absolute_uri(self.base, uri)
        swap = feedparser.urls.ACCEPTABLE_URI_SCHEMES
        feedparser.urls.ACCEPTABLE_URI_SCHEMES = ()
        value3 = feedparser.urls.make_safe_absolute_uri(self.base, uri)
        feedparser.urls.ACCEPTABLE_URI_SCHEMES = swap
        # Only Python 2.7 and up throw a ValueError, otherwise uri is returned
        self.assertTrue(value1 in (uri, ''))
        self.assertTrue(value2 in (uri, ''))
        self.assertTrue(value3 in (uri, ''))
github kurtmckee / feedparser / tests / runtests.py View on Github external
def test_catch_ValueError(self):
        """catch ValueError in Python 2.7 and up"""
        uri = 'http://bad]test/'
        value1 = feedparser.urls.make_safe_absolute_uri(uri)
        value2 = feedparser.urls.make_safe_absolute_uri(self.base, uri)
        swap = feedparser.urls.ACCEPTABLE_URI_SCHEMES
        feedparser.urls.ACCEPTABLE_URI_SCHEMES = ()
        value3 = feedparser.urls.make_safe_absolute_uri(self.base, uri)
        feedparser.urls.ACCEPTABLE_URI_SCHEMES = swap
        # Only Python 2.7 and up throw a ValueError, otherwise uri is returned
        self.assertTrue(value1 in (uri, ''))
        self.assertTrue(value2 in (uri, ''))
        self.assertTrue(value3 in (uri, ''))
github kurtmckee / feedparser / tests / runtests.py View on Github external
def fn(self):
        value = feedparser.urls.make_safe_absolute_uri(self.base, rel)
        self.assertEqual(value, expect)
github kurtmckee / feedparser / feedparser / mixin.py View on Github external
def unknown_starttag(self, tag, attrs):
        # increment depth counter
        self.depth += 1

        # normalize attrs
        attrs = [self._normalize_attributes(attr) for attr in attrs]

        # track xml:base and xml:lang
        attrs_d = dict(attrs)
        baseuri = attrs_d.get('xml:base', attrs_d.get('base')) or self.baseuri
        if isinstance(baseuri, bytes_):
            baseuri = baseuri.decode(self.encoding, 'ignore')
        # ensure that self.baseuri is always an absolute URI that
        # uses a whitelisted URI scheme (e.g. not `javscript:`)
        if self.baseuri:
            self.baseuri = make_safe_absolute_uri(self.baseuri, baseuri) or self.baseuri
        else:
            self.baseuri = _urljoin(self.baseuri, baseuri)
        lang = attrs_d.get('xml:lang', attrs_d.get('lang'))
        if lang == '':
            # xml:lang could be explicitly set to '', we need to capture that
            lang = None
        elif lang is None:
            # if no xml:lang is specified, use parent lang
            lang = self.lang
        if lang:
            if tag in ('feed', 'rss', 'rdf:RDF'):
                self.feeddata['language'] = lang.replace('_', '-')
        self.lang = lang
        self.basestack.append(self.baseuri)
        self.langstack.append(lang)
github kurtmckee / feedparser / feedparser / api.py View on Github external
if not data:
        return result

    # overwrite existing headers using response_headers
    result['headers'].update(response_headers or {})

    data = convert_to_utf8(result['headers'], data, result)
    use_strict_parser = result['encoding'] and True or False

    result['version'], data, entities = replace_doctype(data)

    # Ensure that baseuri is an absolute URI using an acceptable URI scheme.
    contentloc = result['headers'].get('content-location', '')
    href = result.get('href', '')
    baseuri = make_safe_absolute_uri(href, contentloc) or make_safe_absolute_uri(contentloc) or href

    baselang = result['headers'].get('content-language', None)
    if isinstance(baselang, bytes_) and baselang is not None:
        baselang = baselang.decode('utf-8', 'ignore')

    if not _XML_AVAILABLE:
        use_strict_parser = 0
    if use_strict_parser:
        # initialize the SAX parser
        feedparser = StrictFeedParser(baseuri, baselang, 'utf-8')
        feedparser.resolve_relative_uris = resolve_relative_uris
        feedparser.sanitize_html = sanitize_html
        saxparser = xml.sax.make_parser(PREFERRED_XML_PARSERS)
        saxparser.setFeature(xml.sax.handler.feature_namespaces, 1)
        try:
            # disable downloading external doctype references, if possible
github kurtmckee / feedparser / feedparser / namespaces / _base.py View on Github external
def _end_newlocation(self):
        url = self.pop('newlocation')
        context = self._get_context()
        # don't set newlocation if the context isn't right
        if context is not self.feeddata:
            return
        context['newlocation'] = make_safe_absolute_uri(self.baseuri, url.strip())
github kurtmckee / feedparser / feedparser / sanitizer.py View on Github external
elif tag not in self.acceptable_elements:
                return

        # declare xlink namespace, if needed
        if self.mathmlOK or self.svgOK:
            if any((a for a in attrs if a[0].startswith('xlink:'))):
                if not ('xmlns:xlink', 'http://www.w3.org/1999/xlink') in attrs:
                    attrs.append(('xmlns:xlink', 'http://www.w3.org/1999/xlink'))

        clean_attrs = []
        for key, value in self.normalize_attrs(attrs):
            if key in acceptable_attributes:
                key = keymap.get(key, key)
                # make sure the uri uses an acceptable uri scheme
                if key == 'href':
                    value = make_safe_absolute_uri(value)
                clean_attrs.append((key, value))
            elif key == 'style':
                clean_value = self.sanitize_style(value)
                if clean_value:
                    clean_attrs.append((key, clean_value))
        super(_HTMLSanitizer, self).unknown_starttag(tag, clean_attrs)