How to use the pylatexenc.latex2text.LatexNodes2Text function in pylatexenc

To help you get started, we’ve selected a few pylatexenc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github phfaist / pylatexenc / test / test_latex2text.py View on Github external
def do_test(tex, uni, math_mode=None):
            kwargs = {}
            if math_mode is not None:
                kwargs['math_mode'] = math_mode
            self.assertEqual(
                LatexNodes2Text(strict_latex_spaces=True, **kwargs).latex_to_text(tex),
                uni,
                msg="For TeX=r'{}'".format(tex)
            )
github phfaist / pylatexenc / test / test_latex2text.py View on Github external
def do_test(tex, uni):
            self.assertEqual(LatexNodes2Text().latex_to_text(tex), uni,
                             msg="For TeX=r'{}'".format(tex))
github phfaist / pylatexenc / test / test_latex2text.py View on Github external
def test_mathmodes_02(self):
        latex = r"""
If $\alpha=1$ and \(\beta=2\), then
\[
  \beta=2\alpha\ ,
\]
or, equivalently,
$$ \alpha = \frac1{\beta}\ .$$
"""
        
        l2t = LatexNodes2Text(math_mode='verbatim')
        self.assertEqualUpToWhitespace(
            l2t.latex_to_text(latex),
            latex # math stays verbatim
        )
github phfaist / pylatexenc / test / test_latex2text.py View on Github external
def test_accents(self):
        self.assertEqual(
            LatexNodes2Text().nodelist_to_text(LatexWalker(r"Fran\c cais").get_latex_nodes()[0]),
            '''Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais'''
        )
        self.assertEqual(
            LatexNodes2Text().nodelist_to_text(LatexWalker(r"Fr\'en{\'{e}}tique").get_latex_nodes()[0]),
            '''Fr\N{LATIN SMALL LETTER E WITH ACUTE}n\N{LATIN SMALL LETTER E WITH ACUTE}tique'''
        )
        self.assertEqual(
            LatexNodes2Text(math_mode='with-delimiters')
            .nodelist_to_text(LatexWalker(r"$1 \not= 2$").get_latex_nodes()[0]),
            '''$1 {} 2$'''.format(unicodedata.normalize('NFC', "=\N{COMBINING LONG SOLIDUS OVERLAY}"))
        )
github phfaist / pylatexenc / test / test_latex2text.py View on Github external
\beta=2\alpha\ ,
\]
or, equivalently,
$$ \alpha = \frac1{\beta}\ .$$
"""
        correct_text = r"""
If $α=1$ and \(β=2\), then
\[
    β=2α ,
\]
or, equivalently,
$$
    α = 1/β .
$$
"""
        l2t = LatexNodes2Text(math_mode='with-delimiters')
        self.assertEqualUpToWhitespace(
            l2t.latex_to_text(latex),
            correct_text
        )
github phfaist / pylatexenc / test / test_latex2text.py View on Github external
def test_keep_braced_groups(self):
        self.assertEqual(
            LatexNodes2Text(keep_braced_groups=True)
            .nodelist_to_text(
                LatexWalker(
                    r"\textit{Voil\`a du texte}. Il est \'{e}crit {en fran{\c{c}}ais}"
                ).get_latex_nodes()[0]
            ),
            '''Voil\N{LATIN SMALL LETTER A WITH GRAVE} du texte. Il est \N{LATIN SMALL LETTER E WITH ACUTE}crit {en fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais}'''
        )

        self.assertEqual(
            LatexNodes2Text(keep_braced_groups=True, keep_braced_groups_minlen=4)
            .nodelist_to_text(LatexWalker(r"A{XYZ}{ABCD}").get_latex_nodes()[0]),
            '''AXYZ{ABCD}'''
        )
        self.assertEqual(
            LatexNodes2Text(keep_braced_groups=True, keep_braced_groups_minlen=0)
            .nodelist_to_text(LatexWalker(r"{A}{XYZ}{ABCD}").get_latex_nodes()[0]),
            '''{A}{XYZ}{ABCD}'''
        )
github phfaist / bibolamazi / bibolamazi / filters / fixes.py View on Github external
for nl, sep, need_protection_hint in iterate_over_words_in_nodelist(nodelist):
            #logger.longdebug("chunk: nl=%r, sep=%r, need_protection_hint=%r, text-version=%r",
            #                 nl, sep, need_protection_hint, l2t.nodelist_to_text(nl))
            nl_to_latex = "".join(nnn.latex_verbatim() for nnn in nl)
            if need_protection_hint:
                #logger.longdebug("protecting chunk due to hint flag")
                new_expression += '{{' + nl_to_latex + '}}' + sep
            elif needs_protection(l2t.nodelist_to_text(nl)):
                #logger.longdebug("protecting chunk by inspection of text representation")
                new_expression += '{{' + nl_to_latex + '}}' + sep
            else:
                new_expression += nl_to_latex + sep
        return new_expression

    lw = latexwalker.LatexWalker(title)
    l2t = latex2text.LatexNodes2Text(math_mode='with-delimiters',
                                     latex_context=butils.latex2text_latex_context)
    newtitle = ''
    oldi = 0
    while True:
        i = title.find('{{', oldi)
        if i == -1: # not found
            break
        (n, pos, len_) = lw.get_latex_expression(i, strict_braces=False)
        assert pos == i
        newi = i + len_
        if title[newi-2:newi] != '}}':
            # expression must be closed by '}}', i.e. we used
            # get_latex_expression to get the inner {...} braced group,
            # but the outer group must be closed immediately after the
            # expression we read. Otherwise it's not Zotero-protected
            # and we skip this group.
github albertlauncher / python / tex_to_unicode.py View on Github external
stripped = query.string.strip()

    success = False
    if stripped:
        if not stripped.startswith('\\'):
            stripped = '\\' + stripped

        # Remove double backslashes (newlines)
        stripped = stripped.replace('\\\\', ' ')

        # pylatexenc doesn't support \not
        stripped = stripped.replace('\\not', '@NOT@')

        # pylatexenc doesn't like backslashes at end of string
        if not stripped.endswith('\\'):
            n = LatexNodes2Text()
            result = n.latex_to_text(stripped)
            if result:
                result = unicodedata.normalize('NFC', result)
                result = re.sub(r'@NOT@\s*(\S)', '\\1' + COMBINING_LONG_SOLIDUS_OVERLAY, result)
                result = result.replace('@NOT@', '')
                result = unicodedata.normalize('NFC', result)
                item.text = result
                item.subtext = 'Result'
                success = True

    if not success:
        item.text = stripped
        item.subtext = 'Type some TeX math'
        success = False

    if success:
github phfaist / bibolamazi / bibolamazi / filters / nameinitials.py View on Github external
#
                # # de-latex the person first
                # pstr = str(p)
                # # BUG: FIXME: remove space after any macros
                # # replace "blah\macro blah" by "blah\macro{}blah"
                # pstr = re.sub(r'(\\[a-zA-Z]+)\s+', r'\1{}', pstr)
                #if (self._names_to_utf8):
                #    pstr = latex2text.latex2text(pstr)
                #
                #p = Person(pstr)

                if self._names_to_utf8:
                    # delatex everything to UTF-8, but honor names protected by
                    # braces and keep those
                    rxmacrospace = re.compile(r'(\\[a-zA-Z]+)\s+')
                    l2t = LatexNodes2Text(keep_braced_groups=True, strict_latex_spaces=True)
                    protected_detex_fn = \
                        lambda x: l2t.latex_to_text(rxmacrospace.sub(r'\1{}', x)).strip()

                    # join name again to correctly treat accents like
                    # "Fran\c cois" or "\AA berg"
                    p = Person(protected_detex_fn(str(p)))

                    # do_detex = lambda lst: [ protected_detex(x) for x in lst ]
                    # p.first_names = do_detex(p.first_names)
                    # p.middle_names = do_detex(p.middle_names)
                    # p.prelast_names = do_detex(p.prelast_names)
                    # p.last_names = do_detex(p.last_names)
                    # p.lineage = do_detex(p.lineage_names)


                if self._only_single_letter_firsts:
github phfaist / pylatexenc / pylatexenc / latex2text / __main__.py View on Github external
if args.fill_text != -1:
        if args.fill_text is not None and len(args.fill_text):
            fill_text = int(args.fill_text)
        else:
            fill_text = True
    else:
        fill_text = None

    lw = latexwalker.LatexWalker(latex,
                                 tolerant_parsing=args.tolerant_parsing,
                                 strict_braces=args.strict_braces)

    (nodelist, pos, len_) = lw.get_latex_nodes()

    ln2t = LatexNodes2Text(math_mode=args.math_mode,
                           keep_comments=args.keep_comments,
                           strict_latex_spaces=args.strict_latex_spaces,
                           keep_braced_groups=args.keep_braced_groups,
                           keep_braced_groups_minlen=args.keep_braced_groups_minlen,
                           fill_text=fill_text)

    print(ln2t.nodelist_to_text(nodelist) + "\n")