How to use the thinc.v2v.Model.define_operators function in thinc

To help you get started, we’ve selected a few thinc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / spaCy / spacy / _ml.py View on Github external
def build_text_classifier(nr_class, width=64, **cfg):
    depth = cfg.get("depth", 2)
    nr_vector = cfg.get("nr_vector", 5000)
    pretrained_dims = cfg.get("pretrained_dims", 0)
    with Model.define_operators({">>": chain, "+": add, "|": concatenate, "**": clone}):
        if cfg.get("low_data") and pretrained_dims:
            model = (
                SpacyVectors
                >> flatten_add_lengths
                >> with_getitem(0, Affine(width, pretrained_dims))
                >> ParametricAttention(width)
                >> Pooling(sum_pool)
                >> Residual(ReLu(width, width)) ** 2
                >> zero_init(Affine(nr_class, width, drop_factor=0.0))
                >> logistic
            )
            return model

        lower = HashEmbed(width, nr_vector, column=1)
        prefix = HashEmbed(width // 2, nr_vector, column=2)
        suffix = HashEmbed(width // 2, nr_vector, column=3)
github explosion / thinc / examples / imdb_attention.py View on Github external
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        embed = (
            HashEmbed(width, 5000, column=1)
            | StaticVectors(vectors_name, width, column=5)
            | HashEmbed(width // 2, 750, column=2)
            | HashEmbed(width // 2, 750, column=3)
            | HashEmbed(width // 2, 750, column=4)
        ) >> LN(Maxout(width))

        sent2vec = (
            with_flatten(embed)
            >> Residual(
                prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
                >> MultiHeadedAttention()
                >> with_flatten(Maxout(width, width, pieces=3))
            )
            >> flatten_add_lengths
github explosion / thinc / examples / lstm_tagger.py View on Github external
min_batch_size=16,
    max_batch_size=16,
    learn_rate=0.001,
    momentum=0.9,
    dropout=0.5,
    dropout_decay=1e-4,
    nb_epoch=20,
    L2=1e-6,
):
    prefer_gpu()
    cfg = dict(locals())
    print(cfg)
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (
            with_flatten(
                (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)
            )
            >> BiLSTM(width, width) ** depth
            >> with_flatten(Softmax(nr_tag))
        )

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)
github explosion / spaCy / bin / wiki_entity_linking / train_descriptions.py View on Github external
def _build_network(self, orig_width, hidden_with):
        with Model.define_operators({">>": chain}):
            # very simple encoder-decoder model
            self.encoder = Affine(hidden_with, orig_width)
            self.model = self.encoder >> zero_init(
                Affine(orig_width, hidden_with, drop_factor=0.0)
            )
        self.sgd = create_default_optimizer(self.model.ops)
github explosion / spaCy / examples / pipeline / wiki_entity_linking / train_el.py View on Github external
def _encoder(in_width, hidden_with, end_width):
        conv_depth = 2
        cnn_maxout_pieces = 3

        with Model.define_operators({">>": chain, "**": clone}):
            convolution = Residual((ExtractWindow(nW=1) >>
                                    LN(Maxout(hidden_with, hidden_with * 3, pieces=cnn_maxout_pieces))))

            encoder = SpacyVectors \
                      >> with_flatten(LN(Maxout(hidden_with, in_width)) >> convolution ** conv_depth, pad=conv_depth) \
                      >> flatten_add_lengths \
                      >> ParametricAttention(hidden_with)\
                      >> Pooling(mean_pool) \
                      >> Residual(zero_init(Maxout(hidden_with, hidden_with))) \
                      >> zero_init(Affine(end_width, hidden_with, drop_factor=0.0))

            # TODO: ReLu or LN(Maxout)  ?
            # sum_pool or mean_pool ?

        return encoder
github explosion / thinc / examples / text-pair / glove_mwe_multipool_siamese.py View on Github external
nlp = get_spacy("en_vectors_web_lg")

    if use_gpu:
        Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    Model.lsuv = True
    # Model.ops = CupyOps()
    with Model.define_operators({">>": chain, "**": clone, "|": concatenate, "+": add}):
        mwe_encode = ExtractWindow(nW=1) >> LN(
            Maxout(width, drop_factor=0.0, pieces=pieces)
        )

        sent2vec = (
            flatten_add_lengths
            >> with_getitem(
                0,
                (HashEmbed(width, 3000) | StaticVectors("en_vectors_web_lg", width))
                >> LN(Maxout(width, width * 2))
                >> Residual(mwe_encode) ** depth,
            )  # : word_ids{T}
            >> Pooling(mean_pool, max_pool)
            >> Residual(LN(Maxout(width * 2, pieces=pieces), nO=width * 2)) ** 2
            >> logistic
        )
github explosion / spaCy / examples / training / pretrain_textcat.py View on Github external
def build_textcat_model(tok2vec, nr_class, width):
    from thinc.v2v import Model, Softmax, Maxout
    from thinc.api import flatten_add_lengths, chain
    from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
    from thinc.misc import Residual, LayerNorm
    from spacy._ml import logistic, zero_init

    with Model.define_operators({">>": chain}):
        model = (
            tok2vec
            >> flatten_add_lengths
            >> Pooling(mean_pool)
            >> Softmax(nr_class, width)
        )
    model.tok2vec = tok2vec
    return model