How to use the thinc.api.chain function in thinc

To help you get started, weโ€™ve selected a few thinc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / thinc / examples / pytorch_lstm_tagger.py View on Github external
dropout=0.5,
    dropout_decay=1e-4,
    nb_epoch=20,
    L2=1e-6,
):
    using_gpu = prefer_gpu()
    if using_gpu:
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
    cfg = dict(locals())
    print(cfg)
    train_data, check_data, nr_tag = ancora_pos_tags()
    train_data = list(train_data)
    check_data = list(check_data)

    extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (
            with_flatten(
                (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)
            )
            >> PyTorchBiLSTM(width, width, depth)
            >> with_flatten(Softmax(nr_tag))
        )

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)
github explosion / spaCy / spacy / ml / _legacy_tok2vec.py View on Github external
def Tok2Vec(width, embed_size, **kwargs):
    # Circular imports :(
    from .._ml import CharacterEmbed
    from .._ml import PyTorchBiLSTM

    pretrained_vectors = kwargs.get("pretrained_vectors", None)
    cnn_maxout_pieces = kwargs.get("cnn_maxout_pieces", 3)
    subword_features = kwargs.get("subword_features", True)
    char_embed = kwargs.get("char_embed", False)
    if char_embed:
        subword_features = False
    conv_depth = kwargs.get("conv_depth", 4)
    bilstm_depth = kwargs.get("bilstm_depth", 0)
    cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
        norm = HashEmbed(width, embed_size, column=cols.index(NORM), name="embed_norm")
        if subword_features:
            prefix = HashEmbed(
                width, embed_size // 2, column=cols.index(PREFIX), name="embed_prefix"
            )
            suffix = HashEmbed(
                width, embed_size // 2, column=cols.index(SUFFIX), name="embed_suffix"
            )
            shape = HashEmbed(
                width, embed_size // 2, column=cols.index(SHAPE), name="embed_shape"
            )
        else:
            prefix, suffix, shape = (None, None, None)
        if pretrained_vectors is not None:
            glove = StaticVectors(pretrained_vectors, width, column=cols.index(ID))
github explosion / thinc / examples / ngram_bow.py View on Github external
def build_model(nr_class, width, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        model = (
            FeatureExtracter([ORTH])
            >> flatten_add_lengths
            >> with_getitem(0, uniqued(HashEmbed(width, 10000, column=0)))
            >> Pooling(mean_pool)
            >> Softmax(nr_class)
        )
    model.lsuv = False
    return model
github explosion / spaCy / spacy / _ml.py View on Github external
def concatenate_lists(*layers, **kwargs):  # pragma: no cover
    """Compose two or more models `f`, `g`, etc, such that their outputs are
    concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
    """
    if not layers:
        return noop()
    drop_factor = kwargs.get("drop_factor", 1.0)
    ops = layers[0].ops
    layers = [chain(layer, flatten) for layer in layers]
    concat = concatenate(*layers)

    def concatenate_lists_fwd(Xs, drop=0.0):
        if drop is not None:
            drop *= drop_factor
        lengths = ops.asarray([len(X) for X in Xs], dtype="i")
        flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
        ys = ops.unflatten(flat_y, lengths)

        def concatenate_lists_bwd(d_ys, sgd=None):
            return bp_flat_y(ops.flatten(d_ys), sgd=sgd)

        return ys, concatenate_lists_bwd

    model = wrap(concatenate_lists_fwd, concat)
    return model
github honnibal / spacy-pretrain-polyaxon / lmao-imdb-1k / pretrain_textcat.py View on Github external
def build_textcat_model(tok2vec, nr_class, width):
    from thinc.v2v import Model, Softmax
    from thinc.api import flatten_add_lengths, chain
    from thinc.t2v import Pooling, mean_pool

    with Model.define_operators({">>": chain}):
        model = (
            tok2vec
            >> flatten_add_lengths
            >> Pooling(mean_pool)
            >> Softmax(nr_class, width)
        )
    model.tok2vec = chain(tok2vec, flatten)
    return model
github explosion / spaCy / spacy / ml / tok2vec.py View on Github external
def Tok2Vec(config):
    doc2feats = make_layer(config["@doc2feats"])
    embed = make_layer(config["@embed"])
    encode = make_layer(config["@encode"])
    field_size = getattr(encode, "receptive_field", 0)
    tok2vec = chain(doc2feats, with_flatten(chain(embed, encode), pad=field_size))
    tok2vec.cfg = config
    tok2vec.nO = encode.nO
    tok2vec.embed = embed
    tok2vec.encode = encode
    return tok2vec
github explosion / thinc / examples / mnist_mlp.py View on Github external
def main(depth=2, width=512, nb_epoch=30):
    prefer_gpu()
    # Configuration here isn't especially good. But, for demo..
    with Model.define_operators({"**": clone, ">>": chain}):
        model = ReLu(width) >> ReLu(width) >> Softmax()

    train_data, dev_data, _ = datasets.mnist()
    train_X, train_y = model.ops.unzip(train_data)
    dev_X, dev_y = model.ops.unzip(dev_data)

    dev_y = to_categorical(dev_y)
    with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer):
        epoch_loss = [0.0]

        def report_progress():
            with model.use_params(optimizer.averages):
                print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout)
            epoch_loss.append(0.0)

        trainer.each_epoch.append(report_progress)
github explosion / thinc / examples / spacy_tagger.py View on Github external
def main(nr_epoch=20, nr_sent=0, width=128, depth=3, max_batch_size=32, dropout=0.3):
    print("Loading spaCy model")
    nlp = spacy.load("en_core_web_md", pipeline=[])
    train_sents, dev_sents, _ = datasets.ewtb_pos_tags()
    train_sents, dev_sents, nr_class = spacy_preprocess(nlp, train_sents, dev_sents)
    if nr_sent >= 1:
        train_sents = train_sents[:nr_sent]

    print("Building the model")
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
        model = (
            SpacyVectors
            >> with_flatten(
              Affine(width)
              >> (ExtractWindow(nW=1) >> BatchNorm(Maxout(width))) ** depth
              >> Softmax(nr_class)
          )
       )
    print("Preparing training")
    dev_X, dev_y = zip(*dev_sents)
    dev_y = model.ops.flatten(dev_y)
    dev_y = to_categorical(dev_y, nb_classes=50)
    train_X, train_y = zip(*train_sents)
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.nb_epoch = nr_epoch
        trainer.dropout = dropout
github explosion / spaCy / spacy / cli / train_from_config.py View on Github external
def create_tb_parser_model(
    tok2vec: Model,
    nr_feature_tokens: StrictInt = 3,
    hidden_width: StrictInt = 64,
    maxout_pieces: StrictInt = 3,
):
    from thinc.v2v import Affine, Model
    from thinc.api import chain
    from spacy._ml import flatten
    from spacy._ml import PrecomputableAffine
    from spacy.syntax._parser_model import ParserModel

    token_vector_width = tok2vec.nO
    tok2vec = chain(tok2vec, flatten)
    tok2vec.nO = token_vector_width

    lower = PrecomputableAffine(
        hidden_width, nF=nr_feature_tokens, nI=tok2vec.nO, nP=maxout_pieces
    )
    lower.nP = maxout_pieces
    with Model.use_device("cpu"):
        upper = Affine()
    # Initialize weights at zero, as it's a classification layer.
    for desc in upper.descriptions.values():
        if desc.name == "W":
            desc.init = None
    return ParserModel(tok2vec, lower, upper)