Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def do_test(tex, uni, math_mode=None):
kwargs = {}
if math_mode is not None:
kwargs['math_mode'] = math_mode
self.assertEqual(
LatexNodes2Text(strict_latex_spaces=True, **kwargs).latex_to_text(tex),
uni,
msg="For TeX=r'{}'".format(tex)
)
def do_test(tex, uni):
self.assertEqual(LatexNodes2Text().latex_to_text(tex), uni,
msg="For TeX=r'{}'".format(tex))
def test_mathmodes_02(self):
latex = r"""
If $\alpha=1$ and \(\beta=2\), then
\[
\beta=2\alpha\ ,
\]
or, equivalently,
$$ \alpha = \frac1{\beta}\ .$$
"""
l2t = LatexNodes2Text(math_mode='verbatim')
self.assertEqualUpToWhitespace(
l2t.latex_to_text(latex),
latex # math stays verbatim
)
def test_accents(self):
self.assertEqual(
LatexNodes2Text().nodelist_to_text(LatexWalker(r"Fran\c cais").get_latex_nodes()[0]),
'''Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais'''
)
self.assertEqual(
LatexNodes2Text().nodelist_to_text(LatexWalker(r"Fr\'en{\'{e}}tique").get_latex_nodes()[0]),
'''Fr\N{LATIN SMALL LETTER E WITH ACUTE}n\N{LATIN SMALL LETTER E WITH ACUTE}tique'''
)
self.assertEqual(
LatexNodes2Text(math_mode='with-delimiters')
.nodelist_to_text(LatexWalker(r"$1 \not= 2$").get_latex_nodes()[0]),
'''$1 {} 2$'''.format(unicodedata.normalize('NFC', "=\N{COMBINING LONG SOLIDUS OVERLAY}"))
)
\beta=2\alpha\ ,
\]
or, equivalently,
$$ \alpha = \frac1{\beta}\ .$$
"""
correct_text = r"""
If $α=1$ and \(β=2\), then
\[
β=2α ,
\]
or, equivalently,
$$
α = 1/β .
$$
"""
l2t = LatexNodes2Text(math_mode='with-delimiters')
self.assertEqualUpToWhitespace(
l2t.latex_to_text(latex),
correct_text
)
def test_keep_braced_groups(self):
self.assertEqual(
LatexNodes2Text(keep_braced_groups=True)
.nodelist_to_text(
LatexWalker(
r"\textit{Voil\`a du texte}. Il est \'{e}crit {en fran{\c{c}}ais}"
).get_latex_nodes()[0]
),
'''Voil\N{LATIN SMALL LETTER A WITH GRAVE} du texte. Il est \N{LATIN SMALL LETTER E WITH ACUTE}crit {en fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais}'''
)
self.assertEqual(
LatexNodes2Text(keep_braced_groups=True, keep_braced_groups_minlen=4)
.nodelist_to_text(LatexWalker(r"A{XYZ}{ABCD}").get_latex_nodes()[0]),
'''AXYZ{ABCD}'''
)
self.assertEqual(
LatexNodes2Text(keep_braced_groups=True, keep_braced_groups_minlen=0)
.nodelist_to_text(LatexWalker(r"{A}{XYZ}{ABCD}").get_latex_nodes()[0]),
'''{A}{XYZ}{ABCD}'''
)
for nl, sep, need_protection_hint in iterate_over_words_in_nodelist(nodelist):
#logger.longdebug("chunk: nl=%r, sep=%r, need_protection_hint=%r, text-version=%r",
# nl, sep, need_protection_hint, l2t.nodelist_to_text(nl))
nl_to_latex = "".join(nnn.latex_verbatim() for nnn in nl)
if need_protection_hint:
#logger.longdebug("protecting chunk due to hint flag")
new_expression += '{{' + nl_to_latex + '}}' + sep
elif needs_protection(l2t.nodelist_to_text(nl)):
#logger.longdebug("protecting chunk by inspection of text representation")
new_expression += '{{' + nl_to_latex + '}}' + sep
else:
new_expression += nl_to_latex + sep
return new_expression
lw = latexwalker.LatexWalker(title)
l2t = latex2text.LatexNodes2Text(math_mode='with-delimiters',
latex_context=butils.latex2text_latex_context)
newtitle = ''
oldi = 0
while True:
i = title.find('{{', oldi)
if i == -1: # not found
break
(n, pos, len_) = lw.get_latex_expression(i, strict_braces=False)
assert pos == i
newi = i + len_
if title[newi-2:newi] != '}}':
# expression must be closed by '}}', i.e. we used
# get_latex_expression to get the inner {...} braced group,
# but the outer group must be closed immediately after the
# expression we read. Otherwise it's not Zotero-protected
# and we skip this group.
stripped = query.string.strip()
success = False
if stripped:
if not stripped.startswith('\\'):
stripped = '\\' + stripped
# Remove double backslashes (newlines)
stripped = stripped.replace('\\\\', ' ')
# pylatexenc doesn't support \not
stripped = stripped.replace('\\not', '@NOT@')
# pylatexenc doesn't like backslashes at end of string
if not stripped.endswith('\\'):
n = LatexNodes2Text()
result = n.latex_to_text(stripped)
if result:
result = unicodedata.normalize('NFC', result)
result = re.sub(r'@NOT@\s*(\S)', '\\1' + COMBINING_LONG_SOLIDUS_OVERLAY, result)
result = result.replace('@NOT@', '')
result = unicodedata.normalize('NFC', result)
item.text = result
item.subtext = 'Result'
success = True
if not success:
item.text = stripped
item.subtext = 'Type some TeX math'
success = False
if success:
#
# # de-latex the person first
# pstr = str(p)
# # BUG: FIXME: remove space after any macros
# # replace "blah\macro blah" by "blah\macro{}blah"
# pstr = re.sub(r'(\\[a-zA-Z]+)\s+', r'\1{}', pstr)
#if (self._names_to_utf8):
# pstr = latex2text.latex2text(pstr)
#
#p = Person(pstr)
if self._names_to_utf8:
# delatex everything to UTF-8, but honor names protected by
# braces and keep those
rxmacrospace = re.compile(r'(\\[a-zA-Z]+)\s+')
l2t = LatexNodes2Text(keep_braced_groups=True, strict_latex_spaces=True)
protected_detex_fn = \
lambda x: l2t.latex_to_text(rxmacrospace.sub(r'\1{}', x)).strip()
# join name again to correctly treat accents like
# "Fran\c cois" or "\AA berg"
p = Person(protected_detex_fn(str(p)))
# do_detex = lambda lst: [ protected_detex(x) for x in lst ]
# p.first_names = do_detex(p.first_names)
# p.middle_names = do_detex(p.middle_names)
# p.prelast_names = do_detex(p.prelast_names)
# p.last_names = do_detex(p.last_names)
# p.lineage = do_detex(p.lineage_names)
if self._only_single_letter_firsts:
if args.fill_text != -1:
if args.fill_text is not None and len(args.fill_text):
fill_text = int(args.fill_text)
else:
fill_text = True
else:
fill_text = None
lw = latexwalker.LatexWalker(latex,
tolerant_parsing=args.tolerant_parsing,
strict_braces=args.strict_braces)
(nodelist, pos, len_) = lw.get_latex_nodes()
ln2t = LatexNodes2Text(math_mode=args.math_mode,
keep_comments=args.keep_comments,
strict_latex_spaces=args.strict_latex_spaces,
keep_braced_groups=args.keep_braced_groups,
keep_braced_groups_minlen=args.keep_braced_groups_minlen,
fill_text=fill_text)
print(ln2t.nodelist_to_text(nodelist) + "\n")