Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def nodelist_to_text(self, nodelist):
"""
Extracts text from a node list. `nodelist` is a list of `latexwalker` nodes,
typically returned by
:py:meth:`pylatexenc.latexwalker.LatexWalker.get_latex_nodes()`.
This function basically applies `node_to_text()` to each node and
concatenates the results into one string. (But not quite actually,
since we take some care as to where we add whitespace according to the
class options.)
"""
s = ''
prev_node = None
for node in nodelist:
if self._is_bare_macro_node(prev_node) and node.isNodeType(latexwalker.LatexCharsNode):
if not self.strict_latex_spaces['between-macro-and-chars']:
# after a macro with absolutely no arguments, include
# post_space in output by default if there are other chars
# that follow. This is for more breathing space (especially
# in equations(?)), and for compatibility with earlier
# versions of pylatexenc (<= 1.3). This is NOT LaTeX'
# default behavior (see issue #11), so only do this if the
# corresponding `strict_latex_spaces=` flag is set.
s += prev_node.macro_post_space
last_nl_pos = s.rfind('\n')
if last_nl_pos != -1:
textcol = len(s)-last_nl_pos-1
else:
textcol = len(s)
def remove_full_braces(val):
val = val.strip()
if len(val) and val[0] == '{' and val[-1] == '}':
# remove the extra braces. But first, check that the braces
# enclose the full field, and we don't have e.g. "{Maxwell}'s
# demon versus {Szilard}", in which case a dumb algorithm would
# leave the invalid LaTeX string "Maxwell}'s demon versus
# {Szilard"
try:
(nodes,pos,length) = \
latexwalker.LatexWalker(val, tolerant_parsing=True).get_latex_braced_group(0)
if pos + length == len(val):
# yes, all fine: the braces are one block for the field
return val[1:-1]
except latexwalker.LatexWalkerError:
logger.longdebug(
"LatexWalkerError while attempting to remove curly braces around valud in %s",
val
)
return val
Return the textual representation of the given `node`.
If `prev_node_hint` is specified, then the current node is formatted
suitably as following the node given in `prev_node_hint`. This might
affect how much space we keep/discard, etc.
"""
if node is None:
return ""
# ### It doesn't look like we use prev_node_hint at all. Eliminate at
# ### some point?
if node.isNodeType(latexwalker.LatexCharsNode):
return self.chars_node_to_text(node, textcol=textcol)
if node.isNodeType(latexwalker.LatexCommentNode):
return self.comment_node_to_text(node)
if node.isNodeType(latexwalker.LatexGroupNode):
return self.group_node_to_text(node)
if node.isNodeType(latexwalker.LatexMacroNode):
return self.macro_node_to_text(node)
if node.isNodeType(latexwalker.LatexEnvironmentNode):
return self.environment_node_to_text(node)
if node.isNodeType(latexwalker.LatexSpecialsNode):
return self.specials_node_to_text(node)
if node.isNodeType(latexwalker.LatexMathNode):
return self.math_node_to_text(node)
# now, combine them smarly into words.
cur_nodelist = []
need_protection_hint = False
for n, sep in split_nodelist:
logger.longdebug("node to consider for chunk: %r, sep=%r", n, sep)
if sep: # has separator
cur_nodelist += [n]
# flush what we've accumulated so far
yield cur_nodelist, sep, need_protection_hint
cur_nodelist = []
need_protection_hint = False
else:
# if there is anything else than a chars, macro or group node
# (group node for e.g., {\'e}), then this chunk will require
# protection (e.g. inline math)
if not n.isNodeType(latexwalker.LatexCharsNode) and \
not n.isNodeType(latexwalker.LatexMacroNode) and \
not n.isNodeType(latexwalker.LatexGroupNode):
need_protection_hint = True
# add this node to the current chunk
cur_nodelist += [n]
# flush last nodes
yield cur_nodelist, '', need_protection_hint
existing code run as it did in `pylatexenc 1.x`. Its use is however
not recommended for new code. You should use "latex specials"
instead for characters that have special LaTeX meaning.
"""
# perform suitable replacements
for pattern, replacement in text_replacements:
if hasattr(pattern, 'sub'):
s = pattern.sub(replacement, s)
else:
s = s.replace(pattern, replacement)
return s
class _PushEquationContext(latexwalker._PushPropOverride):
def __init__(self, l2t):
new_strict_latex_spaces = None
if l2t.strict_latex_spaces['in-equations'] is not None:
new_strict_latex_spaces = _parse_strict_latex_spaces_dict(
l2t.strict_latex_spaces['in-equations']
)
super(_PushEquationContext, self).__init__(l2t, 'strict_latex_spaces',
new_strict_latex_spaces)
if node is None:
return ""
# ### It doesn't look like we use prev_node_hint at all. Eliminate at
# ### some point?
if node.isNodeType(latexwalker.LatexCharsNode):
return self.chars_node_to_text(node, textcol=textcol)
if node.isNodeType(latexwalker.LatexCommentNode):
return self.comment_node_to_text(node)
if node.isNodeType(latexwalker.LatexGroupNode):
return self.group_node_to_text(node)
if node.isNodeType(latexwalker.LatexMacroNode):
return self.macro_node_to_text(node)
if node.isNodeType(latexwalker.LatexEnvironmentNode):
return self.environment_node_to_text(node)
if node.isNodeType(latexwalker.LatexSpecialsNode):
return self.specials_node_to_text(node)
if node.isNodeType(latexwalker.LatexMathNode):
return self.math_node_to_text(node)
logger.warning("LatexNodes2Text.node_to_text(): Unknown node: %r", node)
# discard anything else.
return ""
def parse_args(self, w, pos, parsing_state=None):
from .. import latexwalker
if self.verbatim_arg_type == 'verbatim-environment':
# simply scan the string until we find '\end{verbatim}'. That's
# exactly how LaTeX processes it.
endverbpos = w.s.find(r'\end{verbatim}', pos)
if endverbpos == -1:
raise latexwalker.LatexWalkerParseError(
s=w.s,
pos=pos,
msg=r"Cannot find matching \end{verbatim}"
)
# do NOT include the "\end{verbatim}", latexwalker will expect to
# see it:
len_ = endverbpos-pos
argd = ParsedVerbatimArgs(
verbatim_chars_node=w.make_node(latexwalker.LatexCharsNode,
parsing_state=parsing_state,
chars=w.s[pos:pos+len_],
pos=pos,
len=len_)
)
return (argd, pos, len_)
need_protection_hint = False
for n, sep in split_nodelist:
logger.longdebug("node to consider for chunk: %r, sep=%r", n, sep)
if sep: # has separator
cur_nodelist += [n]
# flush what we've accumulated so far
yield cur_nodelist, sep, need_protection_hint
cur_nodelist = []
need_protection_hint = False
else:
# if there is anything else than a chars, macro or group node
# (group node for e.g., {\'e}), then this chunk will require
# protection (e.g. inline math)
if not n.isNodeType(latexwalker.LatexCharsNode) and \
not n.isNodeType(latexwalker.LatexMacroNode) and \
not n.isNodeType(latexwalker.LatexGroupNode):
need_protection_hint = True
# add this node to the current chunk
cur_nodelist += [n]
# flush last nodes
yield cur_nodelist, '', need_protection_hint
if title[newi-2:newi] != '}}':
# expression must be closed by '}}', i.e. we used
# get_latex_expression to get the inner {...} braced group,
# but the outer group must be closed immediately after the
# expression we read. Otherwise it's not Zotero-protected
# and we skip this group.
newtitle += title[oldi:newi]
oldi = newi
continue
# we got a very-probably-Zotero-protected "{{...}}" group
newtitle += title[oldi:i]
#protected_expression = title[i+2:newi-2]
# go through each top-level node in the protected content and
# see individually if it requires protection. Split char nodes
# at spaces.
assert len(n.nodelist) == 1 and n.nodelist[0].isNodeType(latexwalker.LatexGroupNode)
nodelist = n.nodelist[0].nodelist
new_expression = process_protection_for_expression(nodelist, l2t)
#logger.longdebug("Zotero protect block: protected_expression=%r, new_expression=%r",
# protected_expression, new_expression)
newtitle += new_expression
oldi = newi
# last remaining part of the title
newtitle += title[oldi:]
return newtitle
def _input_node_simplify_repl(self, n):
#
# recurse into files upon '\input{}'
#
if len(n.nodeargs) != 1:
logger.warning(u"Expected exactly one argument for '\\input' ! Got = %r", n.nodeargs)
inputtex = self.read_input_file(self.nodelist_to_text([n.nodeargs[0]]).strip())
if not inputtex:
return ''
return self.nodelist_to_text(
latexwalker.LatexWalker(inputtex, **self.latex_walker_init_args)
.get_latex_nodes()[0]
)