Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.overwrite = overwrite
def start(self, handler, attrs):
pass
def end(self, handler, text):
pass
class RSS(Target):
def start(self, handler, attrs):
if 'xml:base' in attrs.keys():
handler.set_base(attrs.get('xml:base'))
class PodcastItem(Target):
def end(self, handler, text):
by_published = lambda entry: entry.get('published')
handler.data['episodes'].sort(key=by_published, reverse=True)
if handler.max_episodes:
episodes = handler.data['episodes'][:handler.max_episodes]
handler.data['episodes'] = episodes
class PodcastAttr(Target):
WANT_TEXT = True
def end(self, handler, text):
handler.set_podcast_attr(self.key, self.filter_func(text))
class PodcastAttrRelativeLink(PodcastAttr):
def start(self, handler, attrs):
value = attrs.get('href')
if value:
value = urlparse.urljoin(handler.base, value)
handler.set_podcast_attr(self.key, self.filter_func(value))
class EpisodeItem(Target):
def start(self, handler, attrs):
handler.add_episode()
def end(self, handler, text):
handler.validate_episode()
class EpisodeAttr(Target):
WANT_TEXT = True
def end(self, handler, text):
if not self.overwrite and handler.get_episode_attr(self.key):
return
handler.set_episode_attr(self.key, self.filter_func(text))
class EpisodeAttrRelativeLink(EpisodeAttr):
def end(self, handler, text):
text = urlparse.urljoin(handler.base, text)
super(EpisodeAttrRelativeLink, self).end(handler, text)
class EpisodeGuid(EpisodeAttr):
def start(self, handler, attrs):
class PodcastAttrRelativeLink(PodcastAttr):
def end(self, handler, text):
text = urlparse.urljoin(handler.base, text)
super(PodcastAttrRelativeLink, self).end(handler, text)
class PodcastAttrFromHref(Target):
def start(self, handler, attrs):
value = attrs.get('href')
if value:
value = urlparse.urljoin(handler.base, value)
handler.set_podcast_attr(self.key, self.filter_func(value))
class EpisodeItem(Target):
def start(self, handler, attrs):
handler.add_episode()
def end(self, handler, text):
handler.validate_episode()
class EpisodeAttr(Target):
WANT_TEXT = True
def end(self, handler, text):
if not self.overwrite and handler.get_episode_attr(self.key):
return
handler.set_episode_attr(self.key, self.filter_func(text))
handler.set_episode_attr('_guid_is_permalink', True)
else:
handler.set_episode_attr('_guid_is_permalink', False)
def end(self, handler, text):
def filter_func(guid):
guid = guid.strip()
if handler.get_episode_attr('_guid_is_permalink'):
return urlparse.urljoin(handler.base, guid)
return guid
self.filter_func = filter_func
EpisodeAttr.end(self, handler, text)
class EpisodeAttrFromHref(Target):
def start(self, handler, attrs):
value = attrs.get('href')
if value:
value = urlparse.urljoin(handler.base, value)
handler.set_episode_attr(self.key, self.filter_func(value))
class Enclosure(Target):
def __init__(self, file_size_attribute):
Target.__init__(self)
self.file_size_attribute = file_size_attribute
def start(self, handler, attrs):
url = attrs.get('url')
if url is None:
return
return urlparse.urljoin(handler.base, guid)
return guid
self.filter_func = filter_func
EpisodeAttr.end(self, handler, text)
class EpisodeAttrFromHref(Target):
def start(self, handler, attrs):
value = attrs.get('href')
if value:
value = urlparse.urljoin(handler.base, value)
handler.set_episode_attr(self.key, self.filter_func(value))
class Enclosure(Target):
def __init__(self, file_size_attribute):
Target.__init__(self)
self.file_size_attribute = file_size_attribute
def start(self, handler, attrs):
url = attrs.get('url')
if url is None:
return
url = parse_url(urlparse.urljoin(handler.base, url.lstrip()))
file_size = parse_length(attrs.get(self.file_size_attribute))
mime_type = parse_type(attrs.get('type'))
handler.add_enclosure(url, file_size, mime_type)
class RSS(Target):
def start(self, handler, attrs):
if 'xml:base' in attrs.keys():
handler.set_base(attrs.get('xml:base'))
class PodcastItem(Target):
def end(self, handler, text):
by_published = lambda entry: entry.get('published')
handler.data['episodes'].sort(key=by_published, reverse=True)
if handler.max_episodes:
episodes = handler.data['episodes'][:handler.max_episodes]
handler.data['episodes'] = episodes
class PodcastAttr(Target):
WANT_TEXT = True
def end(self, handler, text):
handler.set_podcast_attr(self.key, self.filter_func(text))
class PodcastAttrRelativeLink(PodcastAttr):
def end(self, handler, text):
text = urlparse.urljoin(handler.base, text)
super(PodcastAttrRelativeLink, self).end(handler, text)
class PodcastAttrFromHref(Target):
def start(self, handler, attrs):
value = attrs.get('href')
if value:
WANT_TEXT = True
def __init__(self):
self._want_content = False
def start(self, handler, attrs):
self._mime_type = attrs.get('type', 'text')
def end(self, handler, text):
if self._mime_type == 'html':
handler.set_episode_attr('description_html', text)
elif self._mime_type == 'text':
handler.set_episode_attr('description', squash_whitespace(text))
class RSSItemDescription(Target):
"""
RSS 2.0 almost encourages to put html content in item/description
but content:encoded is the better source of html content and itunes:summary
is known to contain the short textual description of the item.
So use a heuristic to attribute text to either description or description_html,
without overriding existing values.
"""
WANT_TEXT = True
def __init__(self):
self._want_content = False
def end(self, handler, text):
if is_html(text):
if not handler.get_episode_attr('description_html'):
handler.set_episode_attr('description_html', text.strip())
"""
WANT_TEXT = True
def __init__(self):
self._want_content = False
def end(self, handler, text):
if is_html(text):
if not handler.get_episode_attr('description_html'):
handler.set_episode_attr('description_html', text.strip())
elif not handler.get_episode_attr('description'):
# don't overwrite itunes:summary?
handler.set_episode_attr('description', squash_whitespace(text))
class PodloveChapters(Target):
SUPPORTED_VERSIONS = ('1.1', '1.2')
def start(self, handler, attrs):
version = attrs.get('version', '1.1')
if version not in PodloveChapters.SUPPORTED_VERSIONS:
logger.warning('Possible incompatible chapters version: %s', version)
class PodloveChapter(Target):
def start(self, handler, attrs):
# Both the start and title attributes are mandatory
if attrs.get('start') is None or attrs.get('title') is None:
logger.warning('Invalid chapter (missing start and/or and title)')
return
chapter = {
url = parse_url(urlparse.urljoin(handler.base, attrs.get('href')))
mime_type = parse_type(attrs.get('type'))
# RFC 5005 (http://podlove.org/paged-feeds/)
if rel == 'first':
handler.set_podcast_attr('paged_feed_first', url)
elif rel == 'next':
handler.set_podcast_attr('paged_feed_next', url)
elif rel == 'payment':
handler.set_podcast_attr('payment_url', url)
elif mime_type == 'text/html':
if rel in ('self', 'alternate'):
handler.set_podcast_attr('link', url)
class AtomContent(Target):
WANT_TEXT = True
def __init__(self):
self._want_content = False
def start(self, handler, attrs):
self._mime_type = attrs.get('type', 'text')
def end(self, handler, text):
if self._mime_type == 'html':
handler.set_episode_attr('description_html', text)
elif self._mime_type == 'text':
handler.set_episode_attr('description', squash_whitespace(text))
class RSSItemDescription(Target):