Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def create_asttokens(source):
return ASTTokens(source, parse=True)
def compile(self, source, filename, flags=0):
traced_file = super(BirdsEye, self).compile(source, filename, flags)
traced_file.tokens = ASTTokens(source, tree=traced_file.root)
return traced_file
def verify_all_nodes(self, test_case):
"""
Generically test atok.get_text() on the ast tree: for each statement and expression in the
tree, we extract the text, parse it, and see if it produces an equivalent tree. Returns the
number of nodes that were tested this way.
"""
test_case.longMessage = True
tested_nodes = 0
for node in self.all_nodes:
if not (util.is_stmt(node) or util.is_expr(node) or util.is_module(node)):
continue
text = self.atok.get_text(node)
# await is not allowed outside async functions below 3.7
# parsing again would give a syntax error
if 'await' in text and 'async def' not in text and sys.version_info < (3, 7):
continue
# `elif:` is really just `else: if:` to the AST,
# so get_text can return text starting with elif when given an If node.
# This is generally harmless and there's probably no good alternative,
# but in isolation it's invalid syntax
text = re.sub(r'^(\s*)elif(\W)', r'\1if\2', text, re.MULTILINE)
rebuilt_node = test_case.parse_snippet(text, node)
Returns the parsed AST tree for the given text, handling issues with indentation and newlines
when text is really an extracted part of larger code.
"""
# If text is indented, it's a statement, and we need to put in a scope for indents to be valid
# (using textwrap.dedent is insufficient because some lines may not indented, e.g. comments or
# multiline strings). If text is an expression but has newlines, we parenthesize it to make it
# parsable.
# For expressions and statements, we add a dummy statement '_' before it because if it's just a
# string contained in an astroid.Const or astroid.Expr it will end up in the doc attribute and be
# a pain to extract for comparison
indented = re.match(r'^[ \t]+\S', text)
if indented:
return self.module.parse('def dummy():\n' + text).body[0].body[0]
if util.is_expr(node):
return self.module.parse('_\n(' + text + ')').body[1].value
if util.is_module(node):
return self.module.parse(text)
return self.module.parse('_\n' + text).body[1]
def collect_nodes_preorder(root):
"""Returns a list of all nodes using pre-order traversal (i.e. parent before children)."""
nodes = []
def append(node, par_value): # pylint: disable=unused-argument
nodes.append(node)
return (None, None)
util.visit_tree(root, append, None)
return nodes
def parse_snippet(self, text, node):
"""
Returns the parsed AST tree for the given text, handling issues with indentation and newlines
when text is really an extracted part of larger code.
"""
# If text is indented, it's a statement, and we need to put in a scope for indents to be valid
# (using textwrap.dedent is insufficient because some lines may not indented, e.g. comments or
# multiline strings). If text is an expression but has newlines, we parenthesize it to make it
# parsable.
# For expressions and statements, we add a dummy statement '_' before it because if it's just a
# string contained in an astroid.Const or astroid.Expr it will end up in the doc attribute and be
# a pain to extract for comparison
indented = re.match(r'^[ \t]+\S', text)
if indented:
return self.module.parse('def dummy():\n' + text).body[0].body[0]
if util.is_expr(node):
return self.module.parse('_\n(' + text + ')').body[1].value
if util.is_module(node):
return self.module.parse(text)
return self.module.parse('_\n' + text).body[1]
def test_mark_tokens_simple(self):
source = read_fixture('astroid', 'module.py')
root = ast.parse(source)
code = asttokens.CodeText(source)
code.mark_tokens(root)
all_nodes = list(asttokens.walk(root))
def get_node_text(line, col, type_name):
token = code.get_token(line, col)
for n in all_nodes:
if n.first_token == token and n.__class__.__name__ == type_name:
return code.get_text(n)
# Line 14 is: [indent 4] MY_DICT[key] = val
self.assertEqual(get_node_text(14, 4, 'Name'), 'MY_DICT')
self.assertEqual(get_node_text(14, 4, 'Subscript'), 'MY_DICT[key]')
self.assertEqual(get_node_text(14, 4, 'Assign'), 'MY_DICT[key] = val')
# Line 35 is: [indent 12] raise XXXError()
self.assertEqual(get_node_text(35, 12, 'Raise'), 'raise XXXError()')
def test_mark_tokens_multiline(self):
source = (
"""( # line1
a, # line2
b + # line3
c + # line4
d # line5
)""")
root = ast.parse(source)
code = asttokens.CodeText(source)
code.mark_tokens(root)
all_nodes = {code.get_text(node) for node in ast.walk(root)}
self.assertEqual(all_nodes, {
None, # nodes we don't care about
source,
'a', 'b', 'c', 'd',
# All other expressions preserve newlines and comments but are parenthesized.
'(b + # line3\n c)',
'(b + # line3\n c + # line4\n d)',
'(a, # line2\nb + # line3\n c + # line4\n d)',
})
def test_codetext_simple(self):
source = "import re # comment\n\nfoo = 'bar'\n"
ctext = asttokens.CodeText(source)
self.assertEqual(ctext.text, source)
self.assertEqual([str(t) for t in ctext.tokens], [
"NAME:'import'",
"NAME:'re'",
"COMMENT:'# comment'",
"NEWLINE:'\\n'",
"NL:'\\n'",
"NAME:'foo'",
"OP:'='",
'STRING:"\'bar\'"',
"NEWLINE:'\\n'",
"ENDMARKER:''"
])
self.assertEqual(ctext.tokens[5].type, token.NAME)
self.assertEqual(ctext.tokens[5].string, 'foo')
def test_foo(self):
source = "(a,\na + b\n + c + d)"
root = ast.parse(source)
code = asttokens.CodeText(source)
for t in code.tokens:
print t
code.mark_tokens(root)
for n in ast.walk(root):
print repr(n), ast.dump(n, include_attributes=True)
print code.get_text(n)