How to use the thinc.neural.util.to_categorical function in thinc

To help you get started, weโ€™ve selected a few thinc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / thinc / examples / imdb_attention.py View on Github external
def main(use_gpu=False, nb_epoch=100):
    fix_random_seed(0)
    if use_gpu:
        require_gpu()
    train, test = datasets.imdb(limit=2000)
    print("Load data")
    train_X, train_y = zip(*train)
    test_X, test_y = zip(*test)
    train_y = Model.ops.asarray(to_categorical(train_y, nb_classes=2))
    test_y = Model.ops.asarray(to_categorical(test_y, nb_classes=2))

    nlp = spacy.load("en_vectors_web_lg")
    nlp.add_pipe(nlp.create_pipe("sentencizer"), first=True)
    register_vectors(Model.ops, nlp.vocab.vectors.name, nlp.vocab.vectors.data)

    preprocessor = FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID])
    train_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(train_X))]
    test_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(test_X))]

    dev_X = train_X[-1000:]
    dev_y = train_y[-1000:]
    train_X = train_X[:-1000]
    train_y = train_y[:-1000]
    print("Parse data")
    n_sent = sum([len(list(sents)) for sents in train_X])
github explosion / thinc / examples / quora_similarity.py View on Github external
def preprocess(ops, nlp, rows):
    '''Parse the texts with spaCy. Make one-hot vectors for the labels.'''
    Xs = []
    ys = []
    for (text1, text2), label in rows:
        Xs.append((nlp(text1), nlp(text2)))
        ys.append(label)
    return Xs, to_categorical(ops.asarray(ys))
github explosion / thinc / examples / pytorch_mnist_mlp.py View on Github external
def main(depth=2, width=512, nb_epoch=30):
    prefer_gpu()
    torch.set_num_threads(1)

    train_data, dev_data, _ = datasets.mnist()
    train_X, train_y = Model.ops.unzip(train_data)
    dev_X, dev_y = Model.ops.unzip(dev_data)

    dev_y = to_categorical(dev_y)
    model = PyTorchWrapper(
        PyTorchFeedForward(
            depth=depth,
            width=width,
            input_size=train_X.shape[1],
            output_size=dev_y.shape[1],
        )
    )
    with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer):
        epoch_loss = [0.0]

        def report_progress():
            # with model.use_params(optimizer.averages):
            print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout)
            epoch_loss.append(0.0)
github explosion / thinc / examples / attention_tagger.py View on Github external
def preprocess(ops, get_feats, data, nr_tag, npad=4):
    Xs, ys = zip(*data)
    Xs = [ops.asarray(x) for x in get_feats(Xs)]
    ys = [ops.asarray(to_categorical(y, nb_classes=nr_tag)) for y in ys]
    return Xs, ys
github explosion / thinc / examples / pytorch_lstm_tagger.py View on Github external
def preprocess(ops, get_feats, data, nr_tag, npad=4):
    Xs, ys = zip(*data)
    Xs = [ops.asarray(x) for x in get_feats(Xs)]
    ys = [ops.asarray(to_categorical(y, nb_classes=nr_tag)) for y in ys]
    return Xs, ys
github explosion / thinc / examples / linear_bow.py View on Github external
def main():
    train, dev = datasets.imdb()
    train_X, train_y = zip(*train)
    dev_X, dev_y = zip(*dev)
    model = LinearModel(2)
    train_y = to_categorical(train_y, nb_classes=2)
    dev_y = to_categorical(dev_y, nb_classes=2)

    nlp = spacy.load("en")
    train_X = [
        model.ops.asarray([tok.orth for tok in doc], dtype="uint64")
        for doc in nlp.pipe(train_X)
    ]
    dev_X = [
        model.ops.asarray([tok.orth for tok in doc], dtype="uint64")
        for doc in nlp.pipe(dev_X)
    ]
    dev_X = preprocess(model.ops, dev_X)
    with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer):
        trainer.dropout = 0.0
        trainer.batch_size = 512
        trainer.nb_epoch = 3
github explosion / thinc / examples / imdb_cnn.py View on Github external
def main(gpu_id=0, nb_epoch=100):
    fix_random_seed(0)
    if gpu_id >= 0:
        require_gpu(gpu_id=gpu_id)
    train, test = datasets.imdb(limit=0)
    print("Load data")
    train_X, train_y = zip(*train)
    test_X, test_y = zip(*test)
    train_y = Model.ops.asarray(to_categorical(train_y, nb_classes=2))
    test_y = Model.ops.asarray(to_categorical(test_y, nb_classes=2))

    nlp = spacy.load("en_vectors_web_lg")
    nlp.add_pipe(nlp.create_pipe("sentencizer"), first=True)
    register_vectors(Model.ops, nlp.vocab.vectors.name, nlp.vocab.vectors.data)

    preprocessor = FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID])
    train_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(train_X))]
    test_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(test_X))]

    dev_X = train_X[-1000:]
    dev_y = train_y[-1000:]
    train_X = train_X[:-1000]
    train_y = train_y[:-1000]
    print("Parse data")
    n_sent = sum([len(list(sents)) for sents in train_X])
    print("%d sentences" % n_sent)
github explosion / thinc / examples / cnn_twitter_ner.py View on Github external
def preprocess(ops, get_feats, data, nr_tag):
    Xs, ys = zip(*data)
    Xs = [ops.asarray(x) for x in get_feats(Xs)]
    ys = [ops.asarray(to_categorical(y, nb_classes=nr_tag)) for y in ys]
    return Xs, ys
github explosion / thinc / examples / ngram_bow.py View on Github external
def main(use_gpu=False, nb_epoch=50):
    if use_gpu:
        Model.ops = CupyOps()
        Model.Ops = CupyOps
    train, test = datasets.imdb()
    print("Load data")
    train_X, train_y = zip(*train)
    test_X, test_y = zip(*test)
    train_y = to_categorical(train_y, nb_classes=2)
    test_y = to_categorical(test_y, nb_classes=2)

    nlp = Language()

    dev_X = train_X[-1000:]
    dev_y = train_y[-1000:]
    train_X = train_X[:-1000]
    train_y = train_y[:-1000]
    print("Parse data")
    train_X = [nlp.make_doc(x) for x in train_X]
    dev_X = [nlp.make_doc(x) for x in dev_X]

    model = build_model(2, 1)

    print("Begin training")
    with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer):
        epoch_loss = [0.0]