Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
left_curly +
multi_choice_answers.setParseAction(set_multi_choice_answers) +
right_curly
).setParseAction(set_multi_choice_question)
multi_choice_question.ignore(comment)
# True-false questions.
# Sample:
# // question: 0 name: TrueStatement using {T} style
# ::TrueStatement about Grant::Grant was buried in a tomb in NY.{T}
#
# // question: 0 name: FalseStatement using {FALSE} style
# ::FalseStatement about sun::The sun rises in the West.{FALSE}
true_false_feedback = Combine(
pound +
SkipTo(right_curly).setParseAction(strip_spaces))
true_false_answer = (
left_curly +
boolean('answer') +
Optional(true_false_feedback, default='')('feedback') +
right_curly)
true_false_question = (
Optional(title, default='') +
task +
true_false_answer
).setParseAction(set_true_false_question)
true_false_question.ignore(comment)
OBJECT = CaselessLiteral("object").suppress()
attribute_value_pair = Forward() # this is recursed in item_list_entry
simple_identifier = Word(alphas, alphanums + "_")
identifier = Combine( simple_identifier + ZeroOrMore( Literal(".") + simple_identifier ))
object_name = identifier
object_type = identifier
# Integer and floating point values are converted to Python longs and floats, respectively.
int_value = Combine(Optional("-") + Word(nums)).setParseAction(lambda s,l,t: [ long(t[0]) ] )
float_value = Combine(Optional("-") + Optional(Word(nums)) + "." + Word(nums)).setParseAction(lambda s,l,t: [ float(t[0]) ] )
number_value = float_value | int_value
# Base16 constants are left in string form, including the surrounding braces.
base16_value = Combine(Literal("{") + OneOrMore(Word("0123456789ABCDEFabcdef")) + Literal("}"), adjacent=False)
# This is the first part of a hack to convert the various delphi partial sglQuotedStrings
# into a single sglQuotedString equivalent. The gist of it is to combine
# all sglQuotedStrings (with their surrounding quotes removed (suppressed))
# with sequences of #xyz character constants, with "strings" concatenated
# with a '+' sign.
unquoted_sglQuotedString = Combine( Literal("'").suppress() + ZeroOrMore( CharsNotIn("'\n\r") ) + Literal("'").suppress() )
# The parse action on this production converts repetitions of constants into a single string.
pound_char = Combine(
OneOrMore((Literal("#").suppress()+Word(nums)
).setParseAction( lambda s, l, t: to_chr(int(t[0]) ))))
# This is the second part of the hack. It combines the various "unquoted"
# partial strings into a single one. Then, the parse action puts
# a single matched pair of quotes around it.
pyparsing.Word(pyparsing.nums)
+ ','
+ pyparsing.Optional(
pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=',')
+ pyparsing.Optional(',')
)
)
+ ')'
)
tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+')
tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label)
# see _tgrep_segmented_pattern_action
tgrep_node_label_use_pred = tgrep_node_label_use.copy()
macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+')
macro_name.setWhitespaceChars('')
macro_use = pyparsing.Combine('@' + macro_name)
tgrep_node_expr = (
tgrep_node_label_use_pred
| macro_use
| tgrep_nltk_tree_pos
| tgrep_qstring_icase
| tgrep_node_regex_icase
| tgrep_qstring
| tgrep_node_regex
| '*'
| tgrep_node_literal
)
tgrep_node_expr2 = (
tgrep_node_expr
+ pyparsing.Literal('=').setWhitespaceChars('')
+ tgrep_node_label.copy().setWhitespaceChars('')
) | tgrep_node_expr
return Constant(name=v[0], type=v[1])
def app_handler(v):
v = v.asList()
args = v[1:]
return App(predicate=v[0], args=args, type=None)
def lambda_handler(v):
v = v.asList()
var = v[0]
var.type = v[1]
return Lambda(var=var, body=v[2], type=None)
intexpr = Word(nums)
varnameexpr = Word(nums+"P")
realexpr = Combine(Word(nums) + "." + Word(nums))
colon = Suppress(":")
typeexpr = Forward()
typeexpr << (Combine(Word(alphanums+"_") + Optional("*")) |
(Suppress("<") + typeexpr + Suppress(",") + typeexpr + Suppress(">")))
typeexpr.setParseAction(type_handler)
varexpr = Combine("$"+varnameexpr)
varexpr.setParseAction(var_handler)
identifierexpr = Word(alphanums+"-"+"_"+"<"+">"+"=")
constexpr = (intexpr + colon + typeexpr) | \
(realexpr + colon + typeexpr) | \
+ Optional(Combine('/' + Optional(url_path)))('url_path')
+ (Optional(Combine('?' + url_query)('url_query')) & Optional(Combine('#' + url_fragment)('url_fragment')))
)
scheme_less_url = alphanum_word_start + Combine(
Or(
[
Combine(
url_scheme('url_scheme')
+ '://'
+ url_authority('url_authority')
+ Optional(Combine('/' + Optional(url_path)))('url_path')
),
Combine(url_authority('url_authority') + Combine('/' + Optional(url_path))('url_path')),
]
)
+ (Optional(Combine('?' + url_query)('url_query')) & Optional(Combine('#' + url_fragment)('url_fragment')))
)
# this allows for matching file hashes preceeded with an 'x' or 'X' (https://github.com/fhightower/ioc-finder/issues/41)
file_hash_word_start = WordStart(wordChars=alphanums.replace('x', '').replace('X', ''))
md5 = file_hash_word_start + Word(hexnums, exact=32).setParseAction(downcaseTokens) + alphanum_word_end
imphash = Combine(
Or(['imphash', 'import hash']) + Optional(Word(printables, excludeChars=alphanums)) + md5('hash'),
joinString=' ',
adjacent=False,
)
sha1 = file_hash_word_start + Word(hexnums, exact=40).setParseAction(downcaseTokens) + alphanum_word_end
sha256 = file_hash_word_start + Word(hexnums, exact=64).setParseAction(downcaseTokens) + alphanum_word_end
authentihash = Combine(
Or(['authentihash']) + Optional(Word(printables, excludeChars=alphanums)) + sha256('hash'),
joinString=' ',
adjacent=False,
# Define words as Unicode alphanumerics and/or one of "-\'"
word = Regex(r"[\w\-\']+", re.UNICODE).setName("word")
words = OneOrMore(word).setName("literal")
# Define a parser for reserved names.
reserved_names = Combine(PPLiteral("NULL") ^ PPLiteral("VOID"))
# This will match one or more alphanumeric Unicode characters and/or any of the
# following special characters: +-:;,=|/\()[]@#%!^&~$
base_name = Regex(r"[\w\+\-;:\|/\\\(\)\[\]@#%!\^&~\$]+", re.UNICODE)\
.setName("base name")
# A qualified name is a base name plus one or more base names joined by dots,
# i.e. Java package syntax.
qualified_name = Combine(base_name + OneOrMore("." + base_name))\
.setName("qualified name")
# An optionally qualified name is either a base name or a qualified name. This is
# used for rule references.
optionally_qualified_name = Combine(base_name ^ qualified_name)
# Import names are similar, except that they can have wildcards on the end for
# importing all public rules in a grammar
import_name = Combine((qualified_name + Optional(".*")) ^ (base_name + ".*"))
# Grammar names cannot include semicolons because the declared grammar name parser
# will gobble any semicolon after the name that isn't separated by whitespace,
# leading to a parser error.
_grammar_base_name = Regex(r"[\w\+\-:\|/\\\(\)\[\]@#%!\^&~\$]+", re.UNICODE)\
.setName("base name")
grammar_name = Combine(_grammar_base_name ^ Combine(
def __init__(self, debug=False):
self.debug = debug
self.logger = logging.getLogger('StaSh.Parser')
escaped = pp.Combine("\\" + pp.Word(pp.printables + ' ', exact=1)).setParseAction(self.escaped_action)
escaped_oct = pp.Combine(
"\\" + pp.Word('01234567', max=3)
).setParseAction(self.escaped_oct_action)
escaped_hex = pp.Combine(
"\\x" + pp.Word('0123456789abcdefABCDEF', exact=2)
).setParseAction(self.escaped_hex_action)
# Some special uq_word is needed, e.g. &3 for file descriptor of Pythonista interactive prompt
uq_word = (pp.Literal('&3') | pp.Word(_WORD_CHARS)).setParseAction(self.uq_word_action)
bq_word = pp.QuotedString('`', escChar='\\', unquoteResults=False).setParseAction(self.bq_word_action)
dq_word = pp.QuotedString('"', escChar='\\', unquoteResults=False).setParseAction(self.dq_word_action)
sq_word = pp.QuotedString("'", escChar='\\', unquoteResults=False).setParseAction(self.sq_word_action)
# The ^ operator means longest match (as opposed to | which means first match)
word = pp.Combine(pp.OneOrMore(escaped ^ escaped_oct ^ escaped_hex
^ uq_word ^ bq_word ^ dq_word ^ sq_word))\
.setParseAction(self.word_action)
identifier = pp.Word(pp.alphas + '_', pp.alphas + pp.nums + '_').setParseAction(self.identifier_action)
assign_op = pp.Literal('=').setParseAction(self.assign_op_action)
assignment_word = pp.Combine(identifier + assign_op + word).setParseAction(self.assignment_word_action)
def _quoted(expr):
return Combine(Suppress(Literal("'")) + expr + Suppress(Literal("'")))
def __init__(self):
"""
expop :: '^'
multop :: '*' | '/'
addop :: '+' | '-'
integer :: ['+' | '-'] '0'..'9'+
atom :: PI | E | real | fn '(' expr ')' | '(' expr ')'
factor :: atom [ expop factor ]*
term :: factor [ multop factor ]*
expr :: term [ addop term ]*
"""
point = Literal(".")
e = CaselessLiteral("E")
fnumber = Combine(Word("+-" + nums, nums) +
Optional(point + Optional(Word(nums))) +
Optional(e + Word("+-" + nums, nums)))
ident = Word(alphas, alphas + nums + "_$")
plus = Literal("+")
minus = Literal("-")
mult = Literal("*")
div = Literal("/")
lpar = Literal("(").suppress()
rpar = Literal(")").suppress()
addop = plus | minus
multop = mult | div
expop = Literal("^")
pi = CaselessLiteral("PI")
expr = Forward()
atom = ((Optional(oneOf("- +")) +
(pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(self.pushFirst))
def __init__(self):
# speed up infixNotation considerably at the price of some cache memory
ParserElement.enablePackrat()
boolean = Keyword('True') | Keyword('False')
none = Keyword('None')
integer = Word(nums)
real = Combine(Word(nums) + "." + Word(nums))
string = (QuotedString('"', escChar='\\')
| QuotedString("'", escChar='\\'))
regex = QuotedString('/', escChar='\\')
identifier = Word(alphas, alphanums + '_')
dereference = infixNotation(identifier, [
(Literal('.'), 2, opAssoc.LEFT, EvalArith),
])
result = (Keyword('bad') | Keyword('fail') | Keyword('good')
| Keyword('ignore') | Keyword('unknown'))
rval = boolean | none | real | integer | string | regex | result | dereference
rvallist = Group(Suppress('[') + delimitedList(rval) + Suppress(']'))
rvalset = Group(Suppress('{') + delimitedList(rval) + Suppress('}'))
operand = rval | rvallist | rvalset
# parse actions replace the parsed tokens with an instantiated object
# which we can later call into for evaluation of its content