Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def explode_directories(child_directories, already_split=False):
# get all directories including parents
# use already_split=True for the result of get_all_directories()
maybe_split = lambda x: x if already_split else x.split('/')
return set(concat(accumulate(join, maybe_split(directory)) for directory in child_directories))
self.embedding_dimension = dimensions[0]
self.hidden_dimension = dimensions[-1]
# construct the encoder
encoder_units = build_units(self.dimensions[:-1], activation)
encoder_units.extend(
build_units([self.dimensions[-2], self.dimensions[-1]], None)
)
self.encoder = nn.Sequential(*encoder_units)
# construct the decoder
decoder_units = build_units(reversed(self.dimensions[1:]), activation)
decoder_units.extend(
build_units([self.dimensions[1], self.dimensions[0]], final_activation)
)
self.decoder = nn.Sequential(*decoder_units)
# initialise the weights and biases in the layers
for layer in concat([self.encoder, self.decoder]):
weight_init(layer[0].weight, layer[0].bias, gain)
prepared_specs = set(concatv(
specs_to_remove,
specs_to_add,
itervalues(specs_from_history_map),
))
index, r = self._prepare(prepared_specs)
if specs_to_remove:
# In a previous implementation, we invoked SAT here via `r.remove()` to help with
# spec removal, and then later invoking SAT again via `r.solve()`. Rather than invoking
# SAT for spec removal determination, we can use the PrefixGraph and simple tree
# traversal if we're careful about how we handle features. We still invoke sat via
# `r.solve()` later.
_track_fts_specs = (spec for spec in specs_to_remove if 'track_features' in spec)
feature_names = set(concat(spec.get_raw_value('track_features')
for spec in _track_fts_specs))
graph = PrefixGraph((index[dist] for dist in solution), itervalues(specs_map))
removed_records = []
for spec in specs_to_remove:
# If the spec was a track_features spec, then we need to also remove every
# package with a feature that matches the track_feature. The
# `graph.remove_spec()` method handles that for us.
log.trace("using PrefixGraph to remove records for %s", spec)
removed_records.extend(graph.remove_spec(spec))
for rec in removed_records:
# We keep specs (minus the feature part) for the non provides_features packages
# if they're in the history specs. Otherwise, we pop them from the specs_map.
rec_has_a_feature = set(rec.features or ()) & feature_names
if rec_has_a_feature and rec.name in specs_from_history_map:
Yields:
(:class:`spacy.tokens.Span`, :class:`spacy.tokens.Token`, :class:`spacy.tokens.Span`): next quotation in ``doc``
represented as a (speaker, reporting verb, quotation) 3-tuple
Notes:
Loosely inspired by Krestel, Bergler, Witte. "Minding the Source: Automatic
Tagging of Reported Speech in Newspaper Articles".
TODO: Better approach would use ML, but needs a training dataset.
"""
doc_lang = doc.vocab.lang
if doc_lang != "en":
raise NotImplementedError("sorry, English-language texts only :(")
quote_end_punct = {",", ".", "?", "!"}
quote_indexes = set(
itertoolz.concat(
(m.start(), m.end() - 1)
for m in re.finditer(r"(\".*?\")|(''.*?'')|(``.*?'')", doc.text)
)
)
quote_positions = list(
itertoolz.partition(2, sorted(tok.i for tok in doc if tok.idx in quote_indexes))
)
sents = list(doc.sents)
sent_positions = [(sent.start, sent.end) for sent in sents]
for q0, q1 in quote_positions:
quote = doc[q0 : q1 + 1]
# we're only looking for direct quotes, not indirect or mixed
if not any(char in quote_end_punct for char in quote.text[-4:]):
continue
"""
Args:
seq (Sequence)
n (int)
stratify (bool)
random_state (int)
Returns:
list
"""
random.seed(a=random_state)
if stratify is True:
grped = itertoolz.groupby(operator.itemgetter(1), seq)
n_per_grp = max(int(round(n / len(grped))), 1)
sample = list(
itertoolz.concat(
random.sample(examples, min(len(examples), n_per_grp))
for examples in grped.values()
)
)
random.shuffle(sample)
return sample[:n]
else:
return random.sample(seq, min(len(seq), n))
raise InvalidTypeError(self.name, value, match.source, value.__class__.__name__,
self._type.__name__)
# get individual lines from important_matches that were marked important
# these will be prepended to the final result
def get_marked_lines(match, marker, parameter_obj):
return tuple(line
for line, flag in zip(match.value(parameter_obj),
match.valueflags(parameter_obj))
if flag is marker) if match else ()
top_lines = concat(get_marked_lines(m, ParameterFlag.top, self) for m, _ in
relevant_matches_and_values)
# also get lines that were marked as bottom, but reverse the match order so that lines
# coming earlier will ultimately be last
bottom_lines = concat(get_marked_lines(m, ParameterFlag.bottom, self) for m, _ in
reversed(relevant_matches_and_values))
# now, concat all lines, while reversing the matches
# reverse because elements closer to the end of search path take precedence
all_lines = concat(v for _, v in reversed(relevant_matches_and_values))
# stack top_lines + all_lines, then de-dupe
top_deduped = tuple(unique(concatv(top_lines, all_lines)))
# take the top-deduped lines, reverse them, and concat with reversed bottom_lines
# this gives us the reverse of the order we want, but almost there
# NOTE: for a line value marked both top and bottom, the bottom marker will win out
# for the top marker to win out, we'd need one additional de-dupe step
bottom_deduped = unique(concatv(reversed(tuple(bottom_lines)), reversed(top_deduped)))
# just reverse, and we're good to go
return tuple(reversed(tuple(bottom_deduped)))
if tok.orth_ and not tok.is_stop
and not tok.is_punct if not tok.is_space).intersection(top_terms)
for spacy_doc in spacy_docs)
else:
raise ValueError()
# confusing bit of code... it iterates over all sub-documents,
# concatenating all single and term-pair combinations into one iterable per sub-doc,
# as well as adding 1 to an iterable once per sub-doc
# then it splits these two streams, summing the 1s to get the total number of sub-docs
# and concatenating all sub-docs into a single iterable used to initialize a counter in one go
# it's slightly faster than just using a for loop over all subdocs... :shrug:
ones, sds = zip(*((1, itertoolz.concatv(subdoc, itertools.combinations(sorted(subdoc), 2)))
for subdoc in subdocs))
n_subdocs = sum(ones)
term_probs = collections.Counter(itertoolz.concat(sds))
if n_subdocs > 1:
term_probs = dicttoolz.valmap(lambda x: x / n_subdocs, term_probs,
factory=collections.Counter)
return term_probs
def files_from_action(link_path_action):
if isinstance(link_path_action, CompileMultiPycAction):
return link_path_action.target_short_paths
else:
return (link_path_action.target_short_path, )
def paths_from_action(link_path_action):
if isinstance(link_path_action, CompileMultiPycAction):
return link_path_action.prefix_paths_data
else:
if link_path_action.prefix_path_data is None:
return ()
else:
return (link_path_action.prefix_path_data, )
files = concat((files_from_action(x) for x in self.all_link_path_actions if x))
paths_data = PathsData(
paths_version=1,
paths=concat((paths_from_action(x) for x in self.all_link_path_actions if x)),
)
self.prefix_record = PrefixRecord.from_objects(
self.package_info.repodata_record,
self.package_info.index_json_record,
self.package_info.package_metadata,
requested_spec=text_type(self.requested_spec),
paths_data=paths_data,
files=files,
link=link,
url=self.package_info.url,
extracted_package_dir=extracted_package_dir,
package_tarball_full_path=package_tarball_full_path,