How to use the thinc.misc.LayerNorm function in thinc

To help you get started, weโ€™ve selected a few thinc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / spaCy / spacy / _ml.py View on Github external
width, embed_size // 2, column=cols.index(SHAPE), name="embed_shape"
            )
        else:
            prefix, suffix, shape = (None, None, None)
        if pretrained_vectors is not None:
            glove = StaticVectors(pretrained_vectors, width, column=cols.index(ID))

            if subword_features:
                embed = uniqued(
                    (glove | norm | prefix | suffix | shape)
                    >> LN(Maxout(width, width * 5, pieces=3)),
                    column=cols.index(ORTH),
                )
            else:
                embed = uniqued(
                    (glove | norm) >> LN(Maxout(width, width * 2, pieces=3)),
                    column=cols.index(ORTH),
                )
        elif subword_features:
            embed = uniqued(
                (norm | prefix | suffix | shape)
                >> LN(Maxout(width, width * 4, pieces=3)),
                column=cols.index(ORTH),
            )
        elif char_embed:
            embed = concatenate_lists(
                CharacterEmbed(nM=64, nC=8),
                FeatureExtracter(cols) >> with_flatten(norm),
            )
            reduce_dimensions = LN(
                Maxout(width, 64 * 8 + width, pieces=cnn_maxout_pieces)
            )
github explosion / thinc / examples / imdb_cnn.py View on Github external
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        embed = (
            HashEmbed(width, 5000, column=1)
            | StaticVectors(vectors_name, width, column=5)
            | HashEmbed(width // 2, 750, column=2)
            | HashEmbed(width // 2, 750, column=3)
            | HashEmbed(width // 2, 750, column=4)
        ) >> LN(Maxout(width))

        sent2vec = (
            flatten_add_lengths
            >> with_getitem(
                0,
                embed
                >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width))) ** conv_depth,
            )
            >> ParametricAttention(width)
            >> Pooling(sum_pool)
            >> Residual(LN(Maxout(width))) ** depth
        )

        model = (
            foreach(sent2vec, drop_factor=2.0)
            >> flatten_add_lengths
            # This block would allow the model to learn some cross-sentence
            # features. It's not useful on this problem. It might make more
            # sense to use a BiLSTM here, following Liang et al (2016).
            # >> with_getitem(0,
            #    Residual(ExtractWindow(nW=1) >> LN(Maxout(width))) ** conv_depth
            # )
            >> ParametricAttention(width, hard=False)
github explosion / thinc / bin / benchmark_layer_norm.py View on Github external
def main(nr_col=128):
    Model().ops.xp.random.seed(0)
    model = LayerNorm(DummyChild(nr_col))
    Xs = list(create_data(model.ops, nr_col))
    with model.begin_training(Xs[0]):
        pass
    total_Y = 0.
    start = time.time()
    for i in range(10):
        for X in Xs:
            Y, get_dX = model.begin_update(X)
            total_Y += Y.sum()
    end = time.time()
    print(end-start, total_Y)
github explosion / spaCy / spacy / _ml.py View on Github external
if subword_features:
                embed = uniqued(
                    (glove | norm | prefix | suffix | shape)
                    >> LN(Maxout(width, width * 5, pieces=3)),
                    column=cols.index(ORTH),
                )
            else:
                embed = uniqued(
                    (glove | norm) >> LN(Maxout(width, width * 2, pieces=3)),
                    column=cols.index(ORTH),
                )
        elif subword_features:
            embed = uniqued(
                (norm | prefix | suffix | shape)
                >> LN(Maxout(width, width * 4, pieces=3)),
                column=cols.index(ORTH),
            )
        elif char_embed:
            embed = concatenate_lists(
                CharacterEmbed(nM=64, nC=8),
                FeatureExtracter(cols) >> with_flatten(norm),
            )
            reduce_dimensions = LN(
                Maxout(width, 64 * 8 + width, pieces=cnn_maxout_pieces)
            )
        else:
            embed = norm

        convolution = Residual(
            ExtractWindow(nW=1)
            >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces))
github explosion / spaCy / spacy / _ml.py View on Github external
embed = uniqued(
                    (glove | norm) >> LN(Maxout(width, width * 2, pieces=3)),
                    column=cols.index(ORTH),
                )
        elif subword_features:
            embed = uniqued(
                (norm | prefix | suffix | shape)
                >> LN(Maxout(width, width * 4, pieces=3)),
                column=cols.index(ORTH),
            )
        elif char_embed:
            embed = concatenate_lists(
                CharacterEmbed(nM=64, nC=8),
                FeatureExtracter(cols) >> with_flatten(norm),
            )
            reduce_dimensions = LN(
                Maxout(width, 64 * 8 + width, pieces=cnn_maxout_pieces)
            )
        else:
            embed = norm

        convolution = Residual(
            ExtractWindow(nW=1)
            >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces))
        )
        if char_embed:
            tok2vec = embed >> with_flatten(
                reduce_dimensions >> convolution ** conv_depth, pad=conv_depth
            )
        else:
            tok2vec = FeatureExtracter(cols) >> with_flatten(
                embed >> convolution ** conv_depth, pad=conv_depth
github explosion / thinc / examples / transformer_tagger.py View on Github external
train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({'**': clone, '>>': chain, '+': add,
                                 '|': concatenate}):
        lower_case = HashEmbed(width, 100, column=0)
        shape      = HashEmbed(width//2, 200, column=1)
        prefix     = HashEmbed(width//2, 100, column=2)
        suffix     = HashEmbed(width//2, 100, column=3)

        model = (
            flatten_add_lengths
            >> with_getitem(0,
                (lower_case | shape | prefix | suffix )
                >> LayerNorm(Maxout(width, pieces=3))
            )
            >> SelfAttention(nK=32, nO=width, nI=width, nL=1, nR=1)
            >> with_getitem(0, Softmax(nr_tag))
            >> unflatten
        )

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
github explosion / thinc / examples / imdb_attention.py View on Github external
>> ParametricAttention(width, hard=False)
            >> Pooling(mean_pool)
            >> Residual(LN(Maxout(width)))
        )

        model = (
            foreach(sent2vec, drop_factor=2.0)
            >> Residual(
                prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
                >> MultiHeadedAttention()
                >> with_flatten(LN(Affine(width, width)))
            )
            >> flatten_add_lengths
            >> ParametricAttention(width, hard=False)
            >> Pooling(mean_pool)
            >> Residual(LN(Maxout(width))) ** 2
            >> Softmax(nr_class)
        )
    model.lsuv = False
    return model