How to use the html2text.unescape function in html2text

To help you get started, we’ve selected a few html2text examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Dieterbe / rss2email / rss2email.py View on Github external
title = getContent(entry)[:70]

					title = title.replace("\n", " ").strip()

					datetime = time.gmtime()

					if DATE_HEADER:
						for datetype in DATE_HEADER_ORDER:
							kind = datetype+"_parsed"
							if kind in entry and entry[kind]: datetime = entry[kind]

					link = entry.get('link', "")

					from_addr = getEmail(r, entry)

					name = h2t.unescape(getName(r, entry))
					fromhdr = formataddr((name, from_addr,))
					tohdr = (f.to or default_to)
					subjecthdr = title
					datehdr = time.strftime("%a, %d %b %Y %H:%M:%S -0000", datetime)
					useragenthdr = "rss2email"

					# Add post tags, if available
					tagline = ""
					if 'tags' in entry:
						tags = entry.get('tags')
						taglist = []
						if tags:
							for tag in tags:
								taglist.append(tag['term'])
						if taglist:
							tagline = ",".join(taglist)
github rss2email / rss2email / rss2email.py View on Github external
parts = ['']
        feed = parsed.feed
        parts.append(feed.get('title', ''))
        for x in [entry, feed]:
            if 'name' in x.get('author_detail', []):
                if x.author_detail.name:
                    if ''.join(parts):
                        parts.append(': ')
                    parts.append(x.author_detail.name)
                    break
        if not ''.join(parts) and self.use_publisher_email:
            if 'name' in feed.get('publisher_detail', []):
                if ''.join(parts):
                    parts.append(': ')
                parts.append(feed.publisher_detail.name)
        return _html2text.unescape(''.join(parts))
github timbertson / edit-server / edit_server / filters.py View on Github external
def decode(self, content):
		content = self._replace(content, self.replace_html)
		# < and > that are still present need to be distinguishable from actual entities that get decoded to < and >
		content = re.sub('(<|>)', r'_!!\1', content)
		content = html2text.unescape(content)
		content = content.replace(' _place_holder;', ' ')
		return content
github chris-martin / grouch / grouch / scraper.py View on Github external
def text(node):
            if node is not None:
                return unescape(node.text)
github laurentb / weboob / modules / cragr / web / browser.py View on Github external
self.BASEURL = '%s://%s' % (parsed.scheme, parsed.netloc)
            self.location(url, data=data)

        assert self.login_page.is_here()

        # Then, post the password.
        self.page.login(self.username, self.password)

        if self.new_login:
            url = self.page.get_accounts_url()
        else:
            # The result of POST is the destination URL.
            url = self.page.get_result_url()

        if not url.startswith('http'):
            raise BrowserIncorrectPassword(unescape(url, unicode_snob=True))

        self.location(url.replace('Synthese', 'Synthcomptes'))

        if self.login_error.is_here():
            raise BrowserIncorrectPassword()

        if self.page is None:
            raise WebsiteNotSupported()

        if not self.accounts.is_here():
            # Sometimes the home page is Releves.
            new_url  = re.sub('act=([^&=]+)', 'act=Synthcomptes', self.page.url, 1)
            self.location(new_url)

        if not self.accounts.is_here():
            raise BrowserIncorrectPassword()