How to use the srsly.msgpack_dumps function in srsly

To help you get started, we’ve selected a few srsly examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / spaCy / tests / regression / test_issue3526.py View on Github external
def test_entity_ruler_existing_bytes_old_format_safe(patterns, en_vocab):
    nlp = Language(vocab=en_vocab)
    ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
    bytes_old_style = srsly.msgpack_dumps(ruler.patterns)
    new_ruler = EntityRuler(nlp)
    new_ruler = new_ruler.from_bytes(bytes_old_style)
    assert len(new_ruler) == len(ruler)
    for pattern in ruler.patterns:
        assert pattern in new_ruler.patterns
    assert new_ruler.overwrite is not ruler.overwrite
github explosion / thinc / thinc / layers / base.py View on Github external
continue
                    param = layer._mem.get((id_, name))
                    if not isinstance(layer._mem.weights, numpy.ndarray):
                        param = param.get()
                    weights[-1][b"params"].append(
                        {
                            b"name": name,
                            b"offset": start,
                            b"shape": shape,
                            b"value": param,
                        }
                    )
                i += 1
            if hasattr(layer, "_layers"):
                queue.extend(layer._layers)
        return srsly.msgpack_dumps({b"weights": weights})
github explosion / sense2vec / sense2vec / sense2vec.py View on Github external
def to_bytes(self, exclude: Sequence[str] = tuple()) -> bytes:
        """Serialize a Sense2Vec object to a bytestring.

        exclude (list): Names of serialization fields to exclude.
        RETURNS (bytes): The serialized Sense2Vec object.
        """
        vectors_bytes = self.vectors.to_bytes()
        freqs = list(self.freqs.items())
        data = {"vectors": vectors_bytes, "cfg": self.cfg, "freqs": freqs}
        if "strings" not in exclude:
            data["strings"] = self.strings.to_bytes()
        return srsly.msgpack_dumps(data)
github explosion / spacy-transformers / spacy_pytorch_transformers / _tokenizers.py View on Github external
def to_bytes(self, exclude=tuple(), **kwargs):
        self.prepare_for_serialization()
        msg = OrderedDict()
        for field in self.serialization_fields:
            msg[field] = getattr(self, field, None)
        return srsly.msgpack_dumps(msg)
github explosion / spaCy / spacy / lookups.py View on Github external
def to_bytes(self, **kwargs):
        """Serialize the lookups to a bytestring.

        RETURNS (bytes): The serialized Lookups.

        DOCS: https://spacy.io/api/lookups#to_bytes
        """
        return srsly.msgpack_dumps(self._tables)
github chartbeat-labs / textacy / textacy / corpus.py View on Github external
for doc in self:
            tokens.append(doc.to_array(attrs))
            lengths.append(len(doc))
            strings.update(tok.text for tok in doc)
            user_datas.append(doc.user_data)

        msg = {
            "meta": self.spacy_lang.meta,
            "attrs": attrs,
            "tokens": np.vstack(tokens).tobytes("C"),
            "lengths": np.asarray(lengths, dtype="int32").tobytes("C"),
            "strings": list(strings),
            "user_datas": user_datas,
        }
        with tio.open_sesame(filepath, mode="wb") as f:
            f.write(srsly.msgpack_dumps(msg))
github explosion / spaCy / spacy / tokens / _serialize.py View on Github external
doc (Doc): The Doc object to add.

        DOCS: https://spacy.io/api/docbin#add
        """
        array = doc.to_array(self.attrs)
        if len(array.shape) == 1:
            array = array.reshape((array.shape[0], 1))
        self.tokens.append(array)
        spaces = doc.to_array(SPACY)
        assert array.shape[0] == spaces.shape[0]  # this should never happen
        spaces = spaces.reshape((spaces.shape[0], 1))
        self.spaces.append(numpy.asarray(spaces, dtype=bool))
        self.strings.update(w.text for w in doc)
        self.cats.append(doc.cats)
        if self.store_user_data:
            self.user_data.append(srsly.msgpack_dumps(doc.user_data))
github explosion / thinc / thinc / neural / _classes / model.py View on Github external
if not isinstance(layer._mem.weights, numpy.ndarray):
                        param = param.get()
                    weights[-1][b"params"].append(
                        OrderedDict(
                            (
                                (b"name", name),
                                (b"offset", start),
                                (b"shape", shape),
                                (b"value", param),
                            )
                        )
                    )
                i += 1
            if hasattr(layer, "_layers"):
                queue.extend(layer._layers)
        return srsly.msgpack_dumps({b"weights": weights})