Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_entity_ruler_existing_bytes_old_format_safe(patterns, en_vocab):
nlp = Language(vocab=en_vocab)
ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
bytes_old_style = srsly.msgpack_dumps(ruler.patterns)
new_ruler = EntityRuler(nlp)
new_ruler = new_ruler.from_bytes(bytes_old_style)
assert len(new_ruler) == len(ruler)
for pattern in ruler.patterns:
assert pattern in new_ruler.patterns
assert new_ruler.overwrite is not ruler.overwrite
continue
param = layer._mem.get((id_, name))
if not isinstance(layer._mem.weights, numpy.ndarray):
param = param.get()
weights[-1][b"params"].append(
{
b"name": name,
b"offset": start,
b"shape": shape,
b"value": param,
}
)
i += 1
if hasattr(layer, "_layers"):
queue.extend(layer._layers)
return srsly.msgpack_dumps({b"weights": weights})
def to_bytes(self, exclude: Sequence[str] = tuple()) -> bytes:
"""Serialize a Sense2Vec object to a bytestring.
exclude (list): Names of serialization fields to exclude.
RETURNS (bytes): The serialized Sense2Vec object.
"""
vectors_bytes = self.vectors.to_bytes()
freqs = list(self.freqs.items())
data = {"vectors": vectors_bytes, "cfg": self.cfg, "freqs": freqs}
if "strings" not in exclude:
data["strings"] = self.strings.to_bytes()
return srsly.msgpack_dumps(data)
def to_bytes(self, exclude=tuple(), **kwargs):
self.prepare_for_serialization()
msg = OrderedDict()
for field in self.serialization_fields:
msg[field] = getattr(self, field, None)
return srsly.msgpack_dumps(msg)
def to_bytes(self, **kwargs):
"""Serialize the lookups to a bytestring.
RETURNS (bytes): The serialized Lookups.
DOCS: https://spacy.io/api/lookups#to_bytes
"""
return srsly.msgpack_dumps(self._tables)
for doc in self:
tokens.append(doc.to_array(attrs))
lengths.append(len(doc))
strings.update(tok.text for tok in doc)
user_datas.append(doc.user_data)
msg = {
"meta": self.spacy_lang.meta,
"attrs": attrs,
"tokens": np.vstack(tokens).tobytes("C"),
"lengths": np.asarray(lengths, dtype="int32").tobytes("C"),
"strings": list(strings),
"user_datas": user_datas,
}
with tio.open_sesame(filepath, mode="wb") as f:
f.write(srsly.msgpack_dumps(msg))
doc (Doc): The Doc object to add.
DOCS: https://spacy.io/api/docbin#add
"""
array = doc.to_array(self.attrs)
if len(array.shape) == 1:
array = array.reshape((array.shape[0], 1))
self.tokens.append(array)
spaces = doc.to_array(SPACY)
assert array.shape[0] == spaces.shape[0] # this should never happen
spaces = spaces.reshape((spaces.shape[0], 1))
self.spaces.append(numpy.asarray(spaces, dtype=bool))
self.strings.update(w.text for w in doc)
self.cats.append(doc.cats)
if self.store_user_data:
self.user_data.append(srsly.msgpack_dumps(doc.user_data))
if not isinstance(layer._mem.weights, numpy.ndarray):
param = param.get()
weights[-1][b"params"].append(
OrderedDict(
(
(b"name", name),
(b"offset", start),
(b"shape", shape),
(b"value", param),
)
)
)
i += 1
if hasattr(layer, "_layers"):
queue.extend(layer._layers)
return srsly.msgpack_dumps({b"weights": weights})