Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _redundantFree(self, blocks):
"""
Redundant-free Comparisons from Kolb et al, "Dedoop:
Efficient Deduplication with Hadoop"
http://dbs.uni-leipzig.de/file/Dedoop.pdf
"""
coverage = defaultdict(list)
for block_id, records in enumerate(blocks):
for record_id, record in viewitems(records):
coverage[record_id].append(block_id)
for block_id, records in enumerate(blocks):
if block_id % 10000 == 0:
logger.info("%s blocks" % block_id)
marked_records = []
for record_id, record in viewitems(records):
smaller_ids = {covered_id for covered_id
in coverage[record_id]
if covered_id < block_id}
marked_records.append((record_id, record, smaller_ids))
yield marked_records
def eval_assignblk(self, assignblk):
"""
Evaluate AssignBlock using the current state
Returns a dictionary containing modified keys associated to their values
@assignblk: AssignBlock instance
"""
pool_out = {}
eval_cache = {}
for dst, src in viewitems(assignblk):
src = self.eval_expr(src, eval_cache)
if dst.is_mem():
ptr = self.eval_expr(dst.ptr, eval_cache)
# Test if mem lookup is known
tmp = ExprMem(ptr, dst.size)
pool_out[tmp] = src
elif dst.is_id():
pool_out[dst] = src
else:
raise ValueError("Unknown destination type", str(dst))
return pool_out
def run_seq2seq_beam_decoder(args, model_params, decoding_params):
source_vocab = seq2seq_util.gen_vocab(
args.source_corpus,
args.unk_threshold,
)
logger.info('Source vocab size {}'.format(len(source_vocab)))
target_vocab = seq2seq_util.gen_vocab(
args.target_corpus,
args.unk_threshold,
)
inversed_target_vocab = {v: k for (k, v) in viewitems(target_vocab)}
logger.info('Target vocab size {}'.format(len(target_vocab)))
decoder = Seq2SeqModelCaffe2EnsembleDecoder(
translate_params=dict(
ensemble_models=[dict(
source_vocab=source_vocab,
target_vocab=target_vocab,
model_params=model_params,
model_file=args.checkpoint,
)],
decoding_params=decoding_params,
),
)
decoder.load_models()
for line in sys.stdin:
out.write('digraph traceroute {\n')
nodes = set()
edges = set()
if cluster is None:
def _add_node(node):
if node not in nodes:
nodes.add(node)
out.write('\t%d [label="%s"];\n' % (node, utils.int2ip(node)))
else:
clusters = {}
def _add_node(node):
if node not in nodes:
nodes.add(node)
clusters.setdefault(cluster(node), set()).update([node])
for node, node_edges in viewitems(graph):
_add_node(node)
for destnode in node_edges:
_add_node(destnode)
if (node, destnode) not in edges:
out.write("\t%d -> %d;\n" % (node, destnode))
edges.add((node, destnode))
if cluster is not None:
if None in clusters:
for node in clusters.pop(None):
out.write('\t%d [label="%s"];\n' % (node, utils.int2ip(node)))
for clu, nodes in viewitems(clusters):
if isinstance(clu, basestring):
clu = (clu, clu)
out.write('\tsubgraph cluster_%s {\n' % clu[0])
out.write('\t\tlabel = "%s";\n' % clu[1])
for node in nodes:
def unindex(self, data): # pragma: no cover
for field in self.blocker.index_fields:
self.blocker.unindex((record[field]
for record
in viewvalues(data)),
field)
for block_key, record_id in self.blocker(viewitems(data)):
try:
del self.blocked_records[block_key][record_id]
except KeyError:
pass
def get_var_definitions(self, ssa):
"""
Return a dictionary linking variable to its assignment location
@ssa: SSADiGraph instance
"""
ircfg = ssa.graph
def_dct = {}
for node in ircfg.nodes():
for index, assignblk in enumerate(ircfg.blocks[node]):
for dst, src in viewitems(assignblk):
if not dst.is_id():
continue
if dst in ssa.immutable_ids:
continue
assert dst not in def_dct
def_dct[dst] = node, index
return def_dct
def get_children(self):
return list(viewitems(self.fields))
def write_script_rule(self, inputs, outputs, parameters, shell_template, rule_name):
assert '_bash_' not in parameters
first_output_name, first_output_fn = outputs.items()[0] # for rundir, since we cannot sub wildcards in shell
if not rule_name:
rule_name = os.path.dirname(first_output_fn)
rule_name = self.unique_rule_name(self.legalize(rule_name))
wildcard_rundir = os.path.normpath(os.path.dirname(first_output_fn)) # unsubstituted
# We use snake_string_path b/c normpath drops leading ./, but we do NOT want abspath.
input_kvs = ', '.join('%s=%s'%(k, snake_string_path(v)) for k,v in
sorted(viewitems(inputs)))
output_kvs = ', '.join('%s=%s'%(k, snake_string_path(v)) for k,v in
sorted(viewitems(outputs)))
rule_parameters = {k: v for (k, v) in viewitems(parameters) if not k.startswith('_')}
#rule_parameters['reltopdir'] = os.path.relpath('.', wildcard_rundir) # in case we need this later
params = ','.join('\n %s="%s"'%(k,v) for (k, v) in viewitems(rule_parameters))
shell = snake_shell(shell_template, wildcard_rundir)
# cd $(dirname '{output.%(first_output_name)s}')
rule = """
rule static_%(rule_name)s:
input: %(input_kvs)s
output: %(output_kvs)s
params:%(params)s
shell:
'''
outdir=$(dirname {output[0]})
#mkdir -p ${{outdir}}
cd ${{outdir}}
date
def guess_mnemo(cls, bs, attrib, pre_dis_info, offset):
candidates = []
candidates = set()
fname_values = pre_dis_info
todo = [
(dict(fname_values), branch, offset * 8)
for branch in list(viewitems(cls.bintree))
]
for fname_values, branch, offset_b in todo:
(l, fmask, fbits, fname, flen), vals = branch
if flen is not None:
l = flen(attrib, fname_values)
if l is not None:
try:
v = cls.getbits(bs, attrib, offset_b, l)
except IOError:
# Raised if offset is out of bound
continue
offset_b += l
if v & fmask != fbits:
continue
if fname is not None and not fname in fname_values: