Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, tok, arg, pos, len, pre_space, post_space=''):
self.tok = tok
self.arg = arg
self.pos = pos
self.len = len
self.pre_space = pre_space
self.post_space = post_space
self._fields = ['tok', 'arg', 'pos', 'len', 'pre_space']
if self.tok in ('macro', 'comment'):
self._fields.append('post_space')
super(LatexToken, self).__init__()
tok=('begin_environment' if macro == 'begin' else 'end_environment'),
arg=envmatch.group(1),
pos=pos,
len=i+envmatch.end(), # !!envmatch.end() counts from pos+i
pre_space=space
)
# get the following whitespace, and store it in the macro's post_space
post_space = ''
if isalphamacro:
# important, LaTeX does not consume space after non-alpha macros, like \&
while pos+i\s*)').search(s, pos)
mlen = None
if m is not None:
if m.group('extraspace').startswith( ('\n', '\r', '\n\r',) ):
# special case where there is a \n immediately following the
# first one -- this is a new paragraph
arglen = m.start()-pos
mlen = m.start()-pos
mspace = ''
else:
arglen = m.start()-pos
mlen = m.end()-pos
if m is not None:
if m.group('extraspace').startswith( ('\n', '\r', '\n\r',) ):
# special case where there is a \n immediately following the
# first one -- this is a new paragraph
arglen = m.start()-pos
mlen = m.start()-pos
mspace = ''
else:
arglen = m.start()-pos
mlen = m.end()-pos
mspace = m.group()
else:
arglen = len(s)-pos# [ ==len(s[pos:]) ]
mlen = arglen
mspace = ''
return LatexToken(tok='comment', arg=s[pos+1:pos+arglen], pos=pos, len=mlen,
pre_space=space, post_space=mspace)
# see https://stackoverflow.com/a/19343/1694896
openbracechars, closebracechars = zip(*brace_chars)
if s[pos] in openbracechars:
return LatexToken(tok='brace_open', arg=s[pos], pos=pos, len=1, pre_space=space)
if s[pos] in closebracechars:
return LatexToken(tok='brace_close', arg=s[pos], pos=pos, len=1, pre_space=space)
# check for math-mode dollar signs. Using python syntax "string.startswith(pattern, pos)"
if s.startswith('$$', pos):
return LatexToken(tok='mathmode_display', arg='$$', pos=pos, len=2, pre_space=space)
if s.startswith('$', pos):
return LatexToken(tok='mathmode_inline', arg='$', pos=pos, len=1, pre_space=space)
if include_brace_chars:
brace_chars += include_brace_chars
if 'brackets_are_chars' in kwargs:
if not kwargs.pop('brackets_are_chars'):
brace_chars += [('[', ']')]
s = self.s # shorthand
space = ''
while pos < len(s) and s[pos].isspace():
space += s[pos]
pos += 1
if space.endswith('\n\n'): # two \n's indicate new paragraph.
return LatexToken(tok='char', arg='\n\n', pos=pos-2, len=2, pre_space=space[:-2])
if pos >= len(s):
raise LatexWalkerEndOfStream(final_space=space)
if s[pos] == '\\':
# escape sequence
if pos+1 >= len(s):
raise LatexWalkerEndOfStream()
macro = s[pos+1] # next char is necessarily part of macro
# following chars part of macro only if all are alphabetical
isalphamacro = False
i = 2
if s[pos+1].isalpha():
isalphamacro = True
while pos+i
return LatexToken(tok='mathmode_inline', arg='\\'+macro,
pos=pos, len=i, pre_space=space)
# see if we have a begin/end environment
if environments and macro in ['begin', 'end']:
# \begin{environment} or \end{environment}
envmatch = re.match(r'^\s*\{([\w*]+)\}', s[pos+i:])
if envmatch is None:
raise LatexWalkerParseError(
s=s,
pos=pos,
msg=r"Bad \{} macro: expected {{}}".format(macro),
**self.pos_to_lineno_colno(pos, as_dict=True)
)
return LatexToken(
tok=('begin_environment' if macro == 'begin' else 'end_environment'),
arg=envmatch.group(1),
pos=pos,
len=i+envmatch.end(), # !!envmatch.end() counts from pos+i
pre_space=space
)
# get the following whitespace, and store it in the macro's post_space
post_space = ''
if isalphamacro:
# important, LaTeX does not consume space after non-alpha macros, like \&
while pos+i