How to use html2text - 10 common examples

To help you get started, we’ve selected a few html2text examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bukun / TorCMS / torcms / core / tool / run_whoosh.py View on Github external
def do_for_app(rand=True, kind='', doc_type=None):
    '''
    生成whoosh,根据配置文件中类别。
    '''

    if doc_type is None:
        doc_type = {}
    if rand:
        recs = MPost.query_random(num=10, kind=kind)
    else:
        recs = MPost.query_recent(num=2, kind=kind)

    for rec in recs:
        text2 = rec.title + ',' + html2text.html2text(tornado.escape.xhtml_unescape(rec.cnt_html))
        writer = TOR_IDX.writer()
        writer.update_document(
            catid='sid' + kind,
            title=rec.title,
            type=doc_type[rec.kind],
            link='/{0}/{1}'.format(router_post[rec.kind], rec.uid),
            content=text2
        )
        writer.commit()
github realpython / reader / reader / feed.py View on Github external
articles = _feed(url).entries
    try:
        article = articles[int(article_id)]
    except (IndexError, ValueError):
        max_id = len(articles) - 1
        msg = "Unknown article ID, use ID from 0 to {}".format(max_id)
        raise SystemExit("Error: {}".format(msg))

    # Get article as HTML
    try:
        html = article.content[0].value
    except AttributeError:
        html = article.summary

    # Convert HTML to plain text
    to_text = html2text.HTML2Text()
    to_text.ignore_links = not links
    text = to_text.handle(html)

    return u"# {}\n\n{}".format(article.title, text)
github Dieterbe / rss2email / rss2email.py View on Github external
import socket; socket_errors = []
for e in ['error', 'gaierror']:
	if hasattr(socket, e): socket_errors.append(getattr(socket, e))

#DEPRECATED import mimify
#DEPRECATED from StringIO import StringIO as SIO
#DEPRECATED mimify.CHARSET = 'utf-8'

import feedparser
feedparser.USER_AGENT = "rss2email/"+__version__+ " +http://www.allthingsrss.com/rss2email/"

import html2text as h2t

h2t.UNICODE_SNOB = UNICODE_SNOB
h2t.LINKS_EACH_PARAGRAPH = LINKS_EACH_PARAGRAPH
h2t.BODY_WIDTH = BODY_WIDTH
html2text = h2t.html2text

from types import *

### Utility Functions ###

import threading
class TimeoutError(Exception): pass

class InputError(Exception): pass

def timelimit(timeout, function):
#    def internal(function):
        def internal2(*args, **kw):
            """
            from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/473878
github apache / allura / ForgeWiki / forgewiki / scripts / wiki_from_trac / extractors.py View on Github external
def _convert_content_html2text(self, content):
        html2text.BODY_WIDTH = 0  # Don't wrap lines
        content = self._convert_wiki_toc_to_markdown(content)
        content = html2text.html2text(unicode(content))
        # Convert internal links
        internal_url = urlsplit(self.base_url).path + 'wiki/'
        internal_link_re = r'\[([^]]+)\]\(%s([^)]*)\)' % internal_url
        internal_link = re.compile(internal_link_re, re.UNICODE)

        def sub(match):
            caption = match.group(1)
            page = self.convert_title(match.group(2))
            if caption == page:
                link = '[%s]' % unquote(page)
            else:
                link = '[%s](%s)' % (caption, page)
            return link
        return internal_link.sub(sub, content)
github Dieterbe / rss2email / rss2email.py View on Github external
smtpserver = send(fromhdr, tohdr, subjecthdr, content, contenttype, extraheaders, smtpserver)

					f.seen[frameid] = id

				f.etag, f.modified = r.get('etag', None), r.get('modified', None)
			except (KeyboardInterrupt, SystemExit):
				raise
			except:
				logging.warning ("=== rss2email encountered a problem with this feed ===")
				logging.warning ("=== See the rss2email FAQ at http://www.allthingsrss.com/rss2email/ for assistance ===")
				logging.warning ("=== If this occurs repeatedly, send this to lindsey@allthingsrss.com ===")
				logging.warning ("Could not parse %s", f.url)
				logging.warning (traceback.extract_stack())
				logging.warning ("rss2email %s", __version__)
				logging.warning ("feedparser %s", feedparser.__version__)
				logging.warning ("html2text %s", h2t.__version__)
				logging.warning ("Python %s", sys.version)
				logging.warning ("=== END HERE ===")
				continue

	finally:
		save(feeds)
		if smtpserver:
			smtpserver.quit()
github gislite / maplet / script_gen_whoosh_database.py View on Github external
def do_for_post(writer, rand = True):
    mpost = MPost()
    if rand:
        recs = mpost.query_random(50)
    else:
        recs = mpost.query_recent(50)
    doc_type = '<span class="glyphicon glyphicon-list-alt" style="color:green;">[{0}]</span>'.format('文档')
    print(recs.count())
    for rec in recs:
        # sleep(0.1)
        text2 = rec.title +',' + html2text.html2text(tornado.escape.xhtml_unescape(rec.cnt_html))
        # writer.update_document(path=u"/a",content="Replacement for the first document")
        writer.update_document(
            title=rec.title,
            type= doc_type,
            link='/post/{0}.html'.format(rec.uid),
            content=text2
        )
github liberapay / liberapay.com / liberapay / models / participant.py View on Github external
def render(t, context):
                b = base_spt[t].render(context).strip() if base_spt else '$body'
                if t == 'text/plain' and t not in spt:
                    body = html2text(bodies['text/html']).strip()
                else:
                    body = spt[t].render(context).strip()
                bodies[t] = body
                return b.replace('$body', body)
        message = {}
github appressoas / django_cradmin / django_cradmin / apps / cradmin_email / emailutils.py View on Github external
def convert_html_to_plaintext(html):
    """
    Convert the given ``html`` to plain text.
    """
    return html2text.html2text(html)
github nvbn / everpad / everpad / specific / unity / lens.py View on Github external
notebooks = [self.notebook_filter_id]
        else:
            notebooks = dbus.Array([], signature='i')
        if self.place_filter_id:
            place = self.place_filter_id
        else:
            place = 0
        tags = dbus.Array(self.tag_filter_ids, signature='i')
        for note_struct in provider.find_notes(
            search, notebooks, tags, place,
            1000, Note.ORDER_TITLE, -1,
        ):
            note = Note.from_tuple(note_struct)
            results.append(json.dumps({'id': note.id, 'search': search}),
                'everpad-note', self.pin_notes if note.pinnded else self.all_notes,
                "text/html", note.title, html2text(note.content),
            '')