Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if self.index is None: # use the less efficient default way
avg = vecs.mean(axis=0, keepdims=True)
result_keys, _, scores = self.vectors.most_similar(
avg, n=n_similar, batch_size=batch_size
)
result = list(zip(result_keys.flatten(), scores.flatten()))
result = [(self.strings[key], score) for key, score in result if key]
return [(key, score) for key, score in result if key not in keys]
else: # index is built, use annoy
avg = vecs.mean(axis=0, keepdims=False)
nns = self.index.get_nns_by_vector(avg, n_similar, include_distances=True)
result = []
for row, dist in zip(*nns):
key = self.strings[self.vectors.find(row=row)[0]]
if key not in keys:
score = 1.0 if dist == 0.0 else get_similarity(avg, self[key])
result.append((key, score))
return result
keys_b: Union[Sequence[Union[str, int]], str, int],
) -> float:
"""Make a semantic similarity estimate of two keys or two sets of keys.
The default estimate is cosine similarity using an average of vectors.
keys_a (unicode / int / iterable): The string or integer key(s).
keys_b (unicode / int / iterable): The other string or integer key(s).
RETURNS (float): The similarity score.
"""
if isinstance(keys_a, (str, int)):
keys_a = [keys_a]
if isinstance(keys_b, (str, int)):
keys_b = [keys_b]
average_a = numpy.vstack([self[key] for key in keys_a]).mean(axis=0)
average_b = numpy.vstack([self[key] for key in keys_b]).mean(axis=0)
return get_similarity(average_a, average_b)