How to use the pylatexenc.latexwalker function in pylatexenc

To help you get started, we’ve selected a few pylatexenc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github phfaist / pylatexenc / pylatexenc / latex2text / __init__.py View on Github external
def nodelist_to_text(self, nodelist):
        """
        Extracts text from a node list. `nodelist` is a list of `latexwalker` nodes,
        typically returned by
        :py:meth:`pylatexenc.latexwalker.LatexWalker.get_latex_nodes()`.

        This function basically applies `node_to_text()` to each node and
        concatenates the results into one string.  (But not quite actually,
        since we take some care as to where we add whitespace according to the
        class options.)
        """

        s = ''
        prev_node = None
        for node in nodelist:
            if self._is_bare_macro_node(prev_node) and node.isNodeType(latexwalker.LatexCharsNode):
                if not self.strict_latex_spaces['between-macro-and-chars']:
                    # after a macro with absolutely no arguments, include
                    # post_space in output by default if there are other chars
                    # that follow.  This is for more breathing space (especially
                    # in equations(?)), and for compatibility with earlier
                    # versions of pylatexenc (<= 1.3).  This is NOT LaTeX'
                    # default behavior (see issue #11), so only do this if the
                    # corresponding `strict_latex_spaces=` flag is set.
                    s += prev_node.macro_post_space

            last_nl_pos = s.rfind('\n')
            if last_nl_pos != -1:
                textcol = len(s)-last_nl_pos-1
            else:
                textcol = len(s)
github phfaist / bibolamazi / bibolamazi / filters / fixes.py View on Github external
def remove_full_braces(val):
    val = val.strip()
    if len(val) and val[0] == '{' and val[-1] == '}':
        # remove the extra braces. But first, check that the braces
        # enclose the full field, and we don't have e.g. "{Maxwell}'s
        # demon versus {Szilard}", in which case a dumb algorithm would
        # leave the invalid LaTeX string "Maxwell}'s demon versus
        # {Szilard"
        try:
            (nodes,pos,length) =  \
                latexwalker.LatexWalker(val, tolerant_parsing=True).get_latex_braced_group(0)
            if pos + length == len(val):
                # yes, all fine: the braces are one block for the field
                return val[1:-1]
        except latexwalker.LatexWalkerError:
            logger.longdebug(
                "LatexWalkerError while attempting to remove curly braces around valud in %s",
                val
            )
    return val
github phfaist / pylatexenc / pylatexenc / latex2text / __init__.py View on Github external
Return the textual representation of the given `node`.

        If `prev_node_hint` is specified, then the current node is formatted
        suitably as following the node given in `prev_node_hint`.  This might
        affect how much space we keep/discard, etc.
        """
        if node is None:
            return ""

        # ### It doesn't look like we use prev_node_hint at all.  Eliminate at
        # ### some point?
        
        if node.isNodeType(latexwalker.LatexCharsNode):
            return self.chars_node_to_text(node, textcol=textcol)
        
        if node.isNodeType(latexwalker.LatexCommentNode):
            return self.comment_node_to_text(node)
        
        if node.isNodeType(latexwalker.LatexGroupNode):
            return self.group_node_to_text(node)
        
        if node.isNodeType(latexwalker.LatexMacroNode):
            return self.macro_node_to_text(node)
        
        if node.isNodeType(latexwalker.LatexEnvironmentNode):
            return self.environment_node_to_text(node)

        if node.isNodeType(latexwalker.LatexSpecialsNode):
            return self.specials_node_to_text(node)

        if node.isNodeType(latexwalker.LatexMathNode):
            return self.math_node_to_text(node)
github phfaist / bibolamazi / bibolamazi / filters / fixes.py View on Github external
# now, combine them smarly into words.
        cur_nodelist = []
        need_protection_hint = False
        for n, sep in split_nodelist:
            logger.longdebug("node to consider for chunk: %r, sep=%r", n, sep)
            if sep: # has separator
                cur_nodelist += [n]
                # flush what we've accumulated so far
                yield cur_nodelist, sep, need_protection_hint
                cur_nodelist = []
                need_protection_hint = False
            else:
                # if there is anything else than a chars, macro or group node
                # (group node for e.g., {\'e}), then this chunk will require
                # protection (e.g. inline math)
                if not n.isNodeType(latexwalker.LatexCharsNode) and \
                   not n.isNodeType(latexwalker.LatexMacroNode) and \
                   not n.isNodeType(latexwalker.LatexGroupNode):
                    need_protection_hint = True
                # add this node to the current chunk
                cur_nodelist += [n]
        # flush last nodes
        yield cur_nodelist, '', need_protection_hint
github phfaist / pylatexenc / pylatexenc / latex2text / __init__.py View on Github external
existing code run as it did in `pylatexenc 1.x`.  Its use is however
           not recommended for new code.  You should use "latex specials"
           instead for characters that have special LaTeX meaning.
        """
        
        # perform suitable replacements
        for pattern, replacement in text_replacements:
            if hasattr(pattern, 'sub'):
                s = pattern.sub(replacement, s)
            else:
                s = s.replace(pattern, replacement)

        return s


class _PushEquationContext(latexwalker._PushPropOverride):
    def __init__(self, l2t):

        new_strict_latex_spaces = None
        if l2t.strict_latex_spaces['in-equations'] is not None:
            new_strict_latex_spaces = _parse_strict_latex_spaces_dict(
                l2t.strict_latex_spaces['in-equations']
            )

        super(_PushEquationContext, self).__init__(l2t, 'strict_latex_spaces',
                                                   new_strict_latex_spaces)





github phfaist / pylatexenc / pylatexenc / latex2text / __init__.py View on Github external
if node is None:
            return ""

        # ### It doesn't look like we use prev_node_hint at all.  Eliminate at
        # ### some point?
        
        if node.isNodeType(latexwalker.LatexCharsNode):
            return self.chars_node_to_text(node, textcol=textcol)
        
        if node.isNodeType(latexwalker.LatexCommentNode):
            return self.comment_node_to_text(node)
        
        if node.isNodeType(latexwalker.LatexGroupNode):
            return self.group_node_to_text(node)
        
        if node.isNodeType(latexwalker.LatexMacroNode):
            return self.macro_node_to_text(node)
        
        if node.isNodeType(latexwalker.LatexEnvironmentNode):
            return self.environment_node_to_text(node)

        if node.isNodeType(latexwalker.LatexSpecialsNode):
            return self.specials_node_to_text(node)

        if node.isNodeType(latexwalker.LatexMathNode):
            return self.math_node_to_text(node)

        logger.warning("LatexNodes2Text.node_to_text(): Unknown node: %r", node)

        # discard anything else.
        return ""
github phfaist / pylatexenc / pylatexenc / macrospec / _argparsers.py View on Github external
def parse_args(self, w, pos, parsing_state=None):

        from .. import latexwalker

        if self.verbatim_arg_type == 'verbatim-environment':
            # simply scan the string until we find '\end{verbatim}'.  That's
            # exactly how LaTeX processes it.
            endverbpos = w.s.find(r'\end{verbatim}', pos)
            if endverbpos == -1:
                raise latexwalker.LatexWalkerParseError(
                    s=w.s,
                    pos=pos,
                    msg=r"Cannot find matching \end{verbatim}"
                )
            # do NOT include the "\end{verbatim}", latexwalker will expect to
            # see it:
            len_ = endverbpos-pos

            argd = ParsedVerbatimArgs(
                verbatim_chars_node=w.make_node(latexwalker.LatexCharsNode,
                                                parsing_state=parsing_state,
                                                chars=w.s[pos:pos+len_],
                                                pos=pos,
                                                len=len_)
            )
            return (argd, pos, len_)
github phfaist / bibolamazi / bibolamazi / filters / fixes.py View on Github external
need_protection_hint = False
        for n, sep in split_nodelist:
            logger.longdebug("node to consider for chunk: %r, sep=%r", n, sep)
            if sep: # has separator
                cur_nodelist += [n]
                # flush what we've accumulated so far
                yield cur_nodelist, sep, need_protection_hint
                cur_nodelist = []
                need_protection_hint = False
            else:
                # if there is anything else than a chars, macro or group node
                # (group node for e.g., {\'e}), then this chunk will require
                # protection (e.g. inline math)
                if not n.isNodeType(latexwalker.LatexCharsNode) and \
                   not n.isNodeType(latexwalker.LatexMacroNode) and \
                   not n.isNodeType(latexwalker.LatexGroupNode):
                    need_protection_hint = True
                # add this node to the current chunk
                cur_nodelist += [n]
        # flush last nodes
        yield cur_nodelist, '', need_protection_hint
github phfaist / bibolamazi / bibolamazi / filters / fixes.py View on Github external
if title[newi-2:newi] != '}}':
            # expression must be closed by '}}', i.e. we used
            # get_latex_expression to get the inner {...} braced group,
            # but the outer group must be closed immediately after the
            # expression we read. Otherwise it's not Zotero-protected
            # and we skip this group.
            newtitle += title[oldi:newi]
            oldi = newi
            continue
        # we got a very-probably-Zotero-protected "{{...}}" group
        newtitle += title[oldi:i]
        #protected_expression = title[i+2:newi-2]
        # go through each top-level node in the protected content and
        # see individually if it requires protection.  Split char nodes
        # at spaces.
        assert len(n.nodelist) == 1 and n.nodelist[0].isNodeType(latexwalker.LatexGroupNode)
        nodelist = n.nodelist[0].nodelist
        new_expression = process_protection_for_expression(nodelist, l2t)
        #logger.longdebug("Zotero protect block: protected_expression=%r, new_expression=%r",
        #                 protected_expression, new_expression)
        newtitle += new_expression
        oldi = newi

    # last remaining part of the title
    newtitle += title[oldi:]
    return newtitle
github phfaist / pylatexenc / pylatexenc / latex2text / __init__.py View on Github external
def _input_node_simplify_repl(self, n):
        #
        # recurse into files upon '\input{}'
        #
        
        if len(n.nodeargs) != 1:
            logger.warning(u"Expected exactly one argument for '\\input' ! Got = %r", n.nodeargs)

        inputtex = self.read_input_file(self.nodelist_to_text([n.nodeargs[0]]).strip())

        if not inputtex:
            return ''

        return self.nodelist_to_text(
            latexwalker.LatexWalker(inputtex, **self.latex_walker_init_args)
            .get_latex_nodes()[0]
        )