Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
layer followed by an activation layer.
:param dimensions: iterable of dimensions for the chain
:param activation: activation layer to use e.g. nn.ReLU, set to None to disable
:return: list of instances of Sequential
"""
def single_unit(in_dimension: int, out_dimension: int) -> torch.nn.Module:
unit = [("linear", nn.Linear(in_dimension, out_dimension))]
if activation is not None:
unit.append(("activation", activation))
return nn.Sequential(OrderedDict(unit))
return [
single_unit(embedding_dimension, hidden_dimension)
for embedding_dimension, hidden_dimension in sliding_window(2, dimensions)
]
return nx.Graph()
# if len(terms) < window_width, cytoolz throws a StopIteration error
# which we don't want
if len(terms) < window_width:
LOGGER.info(
"`terms` has fewer items (%s) than the specified `window_width` (%s); "
"setting window width to %s",
len(terms),
window_width,
len(terms),
)
window_width = len(terms)
if isinstance(terms[0], compat.unicode_):
windows = itertoolz.sliding_window(window_width, terms)
elif isinstance(terms[0], Token):
if normalize == "lemma":
windows = (
(tok.lemma_ for tok in window)
for window in itertoolz.sliding_window(window_width, terms)
)
elif normalize == "lower":
windows = (
(tok.lower_ for tok in window)
for window in itertoolz.sliding_window(window_width, terms)
)
elif not normalize:
windows = (
(tok.text for tok in window)
for window in itertoolz.sliding_window(window_width, terms)
)
doc (:class:`spacy.tokens.Doc`)
ns (int or Tuple[int]): One or more n values for which to generate n-grams.
For example, ``2`` gets bigrams; ``(2, 3)`` gets bigrams and trigrams.
include_pos (str or Set[str]): One or more POS tags with which to filter ngrams.
If None, include tokens of all POS tags.
Yields:
Tuple[:class:`spacy.tokens.Token`]: Next ngram candidate,
as a tuple of constituent Tokens.
See Also:
:func:`textacy.extract.ngrams()`
"""
ns = t_utils.to_collection(ns, int, tuple)
include_pos = t_utils.to_collection(include_pos, compat.unicode_, set)
ngrams = itertoolz.concat(itertoolz.sliding_window(n, doc) for n in ns)
ngrams = (
ngram
for ngram in ngrams
if not (ngram[0].is_stop or ngram[-1].is_stop)
and not any(word.is_punct or word.is_space for word in ngram)
)
if include_pos:
ngrams = (
ngram
for ngram in ngrams
if all(word.pos_ in include_pos for word in ngram)
)
for ngram in ngrams:
yield ngram
words = (
word for word in words
if word.pos_ in include_pos
)
for word in words:
wid = word.lower
if wid in stop_words or wid in seen_candidates:
continue
else:
seen_candidates.add(wid)
# NOTE: here i've modified the YAKE algorithm to put less emphasis on term freq
# term_scores[word.lower_] = word_scores[wid] / (word_freqs[wid] * (1 + word_scores[wid]))
term_scores[word.lower_] = word_scores[wid] / (math.log2(1 + word_freqs[wid]) * (1 + word_scores[wid]))
# now compute combined scores for (valid) bigram and trigram and candidates
ngrams = itertoolz.concatv(*(itertoolz.sliding_window(n, doc) for n in ngrams if n > 1))
ngrams = [
ngram
for ngram in ngrams
if not (ngram[0].is_stop or ngram[-1].is_stop)
and not any(w.is_punct or w.is_space for w in ngram)
]
if include_pos:
ngrams = [
ngram
for ngram in ngrams
if all(w.pos_ in include_pos for w in ngram)
]
ngram_freqs = itertoolz.frequencies(
" ".join(word.lower_ for word in ngram)
for ngram in ngrams)
for ngram in ngrams:
stop_words (Set[str])
window_size (int)
Returns:
Dict[int, Dict[str, list]]
"""
word_occ_vals = collections.defaultdict(lambda: collections.defaultdict(list))
def _is_upper_cased(tok):
return tok.is_upper or (tok.is_title and not tok.is_sent_start)
attr_name = _get_attr_name(normalize, False)
padding = [None] * window_size
for sent_idx, sent in enumerate(doc.sents):
sent_padded = itertoolz.concatv(padding, sent, padding)
for window in itertoolz.sliding_window(1 + (2 * window_size), sent_padded):
lwords, word, rwords = window[:window_size], window[window_size], window[window_size + 1:]
w_id = getattr(word, attr_name)
if word.is_stop:
stop_words.add(w_id)
word_occ_vals[w_id]["is_uc"].append(_is_upper_cased(word))
word_occ_vals[w_id]["sent_idx"].append(sent_idx)
word_occ_vals[w_id]["l_context"].extend(
getattr(w, attr_name) for w in lwords
if not (w is None or w.is_punct or w.is_space)
)
word_occ_vals[w_id]["r_context"].extend(
getattr(w, attr_name) for w in rwords
if not (w is None or w.is_punct or w.is_space)
)
return word_occ_vals
def __init__(self, dimensions: List[int]):
super(Classifier, self).__init__()
units = []
for from_dimension, to_dimension in sliding_window(2, dimensions):
units.append(nn.Linear(from_dimension, to_dimension))
units.append(nn.ReLU())
self.classifier = nn.Sequential(*units[:-1])
self.softmax = nn.LogSoftmax(dim=1)
return nx.Graph()
# if len(terms) < window_size, cytoolz throws a StopIteration error; prevent it
if len(terms) < window_size:
LOGGER.info(
"`terms` has fewer items (%s) than `window_size` (%s); "
"setting window width to %s",
len(terms),
window_size,
len(terms),
)
window_size = len(terms)
first_term, terms = itertoolz.peek(terms)
if isinstance(first_term, compat.unicode_):
windows = itertoolz.sliding_window(window_size, terms)
elif isinstance(first_term, (Span, Token)):
windows = itertoolz.sliding_window(
window_size, utils.normalize_terms(terms, normalize))
else:
raise TypeError(
"items in `terms` must be strings or spacy tokens, not {}".format(
type(first_term)
)
)
graph = nx.Graph()
if edge_weighting == "count":
cooc_mat = collections.Counter(
w1_w2
for window in windows
for w1_w2 in itertools.combinations(sorted(window), 2)