How to use the thinc.describe.Dimension function in thinc

To help you get started, weโ€™ve selected a few thinc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / thinc / thinc / neural / _classes / hash_embed.py View on Github external
do_lsuv(model.ops, model.vectors, model, X)
    return X


def _uniform_init(lo, hi):
    def wrapped(W, ops):
        if (W ** 2).sum() == 0.0:
            copy_array(W, ops.xp.random.uniform(lo, hi, W.shape))

    return wrapped


# @describe.on_data(LSUVinit)
@describe.attributes(
    nO=Dimension("Vector dimensions"),
    nV=Dimension("Number of vectors"),
    vectors=Weights(
        "Embedding table", lambda obj: (obj.nV, obj.nO), _uniform_init(-0.1, 0.1)
    ),
    d_vectors=Gradient("vectors"),
)
class HashEmbed(Model):
    name = "hash-embed"

    def __init__(self, nO, nV, seed=None, **kwargs):
        Model.__init__(self, **kwargs)
        self.column = kwargs.get("column", 0)
        self.nO = nO
        self.nV = nV
        
        if seed is not None:
            self.seed = seed
github explosion / thinc / thinc / neural / _classes / attention.py View on Github external
# coding: utf8
from __future__ import unicode_literals

from ... import describe
from ...describe import Dimension, Synapses, Gradient
from .model import Model


@describe.attributes(
    nO=Dimension("Output size"),
    Q=Synapses(
        "Learned 'query' vector",
        lambda obj: (obj.nO, 1),
        lambda Q, ops: ops.normal_init(Q, Q.shape[0]),
    ),
    dQ=Gradient("Q"),
)
class ParametricAttention(Model):
    """Weight inputs by similarity to a learned vector"""

    name = "para-attn"

    def __init__(self, nO=None, hard=False, **kwargs):
        Model.__init__(self, **kwargs)
        self.nO = nO
        self.hard = hard
github explosion / spaCy / spacy / _ml.py View on Github external
# The dtype here matches what thinc is expecting -- which differs per
        # platform (by int definition). This should be fixed once the problem
        # is fixed on Thinc's side.
        lengths = self.ops.asarray(
            [arr.shape[0] for arr in batch_keys], dtype=numpy.int_
        )
        batch_keys = self.ops.xp.concatenate(batch_keys)
        batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
        return (batch_keys, batch_vals, lengths), None


@describe.on_data(
    _set_dimensions_if_needed, lambda model, X, y: model.init_weights(model)
)
@describe.attributes(
    nI=Dimension("Input size"),
    nF=Dimension("Number of features"),
    nO=Dimension("Output size"),
    nP=Dimension("Maxout pieces"),
    W=Synapses("Weights matrix", lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI)),
    b=Biases("Bias vector", lambda obj: (obj.nO, obj.nP)),
    pad=Synapses(
        "Pad",
        lambda obj: (1, obj.nF, obj.nO, obj.nP),
        lambda M, ops: ops.normal_init(M, 1.0),
    ),
    d_W=Gradient("W"),
    d_pad=Gradient("pad"),
    d_b=Gradient("b"),
)
class PrecomputableAffine(Model):
    def __init__(self, nO=None, nI=None, nF=None, nP=None, **kwargs):
github explosion / thinc / thinc / neural / _classes / selu.py View on Github external
def _set_dimensions_if_needed(model, X, y=None):
    if model.nI is None:
        model.nI = X.shape[1]
    if model.nO is None and y is not None:
        if len(y.shape) == 2:
            model.nO = y.shape[1]
        else:
            model.nO = int(y.max()) + 1


@describe.on_data(_set_dimensions_if_needed)
@describe.attributes(
    nB=Dimension("Batch size"),
    nI=Dimension("Input size"),
    nO=Dimension("Output size"),
    W=Synapses(
        "Weights matrix",
        lambda obj: (obj.nO, obj.nI),
        lambda W, ops: ops.normal_init(W, W.shape[-1]),
    ),
    b=Biases("Bias vector", lambda obj: (obj.nO,)),
    d_W=Gradient("W"),
    d_b=Gradient("b"),
)
class SELU(Model):
    name = "selu"

    @property
    def input_shape(self):
        return (self.nB, self.nI)
github explosion / spaCy / spacy / _ml.py View on Github external
# is fixed on Thinc's side.
        lengths = self.ops.asarray(
            [arr.shape[0] for arr in batch_keys], dtype=numpy.int_
        )
        batch_keys = self.ops.xp.concatenate(batch_keys)
        batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
        return (batch_keys, batch_vals, lengths), None


@describe.on_data(
    _set_dimensions_if_needed, lambda model, X, y: model.init_weights(model)
)
@describe.attributes(
    nI=Dimension("Input size"),
    nF=Dimension("Number of features"),
    nO=Dimension("Output size"),
    nP=Dimension("Maxout pieces"),
    W=Synapses("Weights matrix", lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI)),
    b=Biases("Bias vector", lambda obj: (obj.nO, obj.nP)),
    pad=Synapses(
        "Pad",
        lambda obj: (1, obj.nF, obj.nO, obj.nP),
        lambda M, ops: ops.normal_init(M, 1.0),
    ),
    d_W=Gradient("W"),
    d_pad=Gradient("pad"),
    d_b=Gradient("b"),
)
class PrecomputableAffine(Model):
    def __init__(self, nO=None, nI=None, nF=None, nP=None, **kwargs):
        Model.__init__(self, **kwargs)
        self.nO = nO
github explosion / thinc / thinc / neural / _classes / static_vectors.py View on Github external
from __future__ import unicode_literals

from ... import describe
from ...describe import Dimension, Synapses, Gradient
from .model import Model
from ...extra.load_nlp import get_vectors

try:
    import cupy
except ImportError:
    cupy = None


# @describe.on_data(LSUVinit)
@describe.attributes(
    nM=Dimension("Vector dimensions"),
    nO=Dimension("Size of output"),
    W=Synapses(
        "A projection matrix, to change vector dimensionality",
        lambda obj: (obj.nO, obj.nM),
        lambda W, ops: ops.xavier_uniform_init(W),
    ),
    d_W=Gradient("W"),
)
class StaticVectors(Model):
    """Load a static embedding table, and learn a linear projection from it.

    Out-of-vocabulary items are modded into the table, receiving an arbitrary
    vector (but the same word will always receive the same vector).
    """

    name = "static-vectors"
github explosion / thinc / thinc / neural / _classes / embed.py View on Github external
    @contextlib.contextmanager
    def use_params(self, params):
        backup = None
        weights = self._mem.weights
        if self.id in params:
            param = params[self.id]
            backup = weights.copy()
            weights[:] = param
        yield
        if backup is not None:
            weights[:] = backup


@describe.on_data(LSUVinit)
@describe.attributes(
    nM=Dimension("Vector dimensions"),
    nV=Dimension("Number of vectors"),
    nO=Dimension("Size of output"),
    W=Synapses(
        "A projection matrix, to change vector dimensionality",
        lambda obj: (obj.nO, obj.nM),
        lambda W, ops: ops.xavier_uniform_init(W),
    ),
    vectors=Weights(
        "Embedding table", lambda obj: (obj.nV, obj.nM), _uniform_init(-0.1, 0.1)
    ),
    d_W=Gradient("W"),
    d_vectors=Gradient("vectors"),
)
class Embed(Model):
    name = "embed"
github explosion / thinc / thinc / neural / _classes / difference.py View on Github external
def on_data(self, X, y):
        input1, input2 = zip(*X)
        for hook in layer.on_data_hooks:
            hook(layer, input1, y)

    model.on_data_hooks.append(on_data)
    return model


def unit_init(W, ops):
    W.fill(1)


@describe.attributes(
    nO=Dimension("Output size"),
    W=Synapses("Weights matrix", lambda obj: (obj.nO,), unit_init),
    d_W=Gradient("W"),
)
class CauchySimilarity(Model):
    # From chen (2013)
    def __init__(self, length):
        Model.__init__(self)
        self.nO = length

    def begin_update(self, vec1_vec2, drop=0.0):
        weights = self.W
        vec1, vec2 = vec1_vec2
        diff = vec1 - vec2
        square_diff = diff ** 2
        total = (weights * square_diff).sum(axis=1)
        sim, bp_sim = inverse(total)
github explosion / thinc / thinc / neural / _classes / convolution.py View on Github external
# coding: utf8
from __future__ import unicode_literals

from .model import Model
from ... import describe
from ...describe import Dimension, AttributeDescription


@describe.attributes(
    nW=Dimension("Number of surrounding tokens on each side to extract"),
    gap=AttributeDescription("Number of nearest tokens to skip, to offset the window"),
)
class ExtractWindow(Model):
    """Add context to vectors in a sequence by concatenating n surrounding
    vectors.

    If the input is (10, 32) and n=1, the output will be (10, 96), with
    output[i] made up of (input[i-1], input[i], input[i+1]).
    """

    name = "extract_window"

    def __init__(self, nW=2, gap=0):
        assert gap == 0
        Model.__init__(self)
        self.nW = nW
github explosion / thinc / thinc / neural / _classes / maxout.py View on Github external
xp = get_array_module(W)
    scale = xp.sqrt(1.0 / W.shape[-1])
    shape = (W.shape[0], W.shape[-1])
    size = xp.prod(shape)
    for i in range(W.shape[1]):
        xp.copyto(
            W[:, i], xp.random.normal(loc=0, scale=scale, size=size).reshape(shape)
        )


@describe.on_data(_set_dimensions_if_needed)
@describe.output(("nO",))
@describe.input(("nI",))
@describe.attributes(
    nI=Dimension("Size of input"),
    nP=Dimension("Number of pieces"),
    nO=Dimension("Size of output"),
    W=Synapses(
        "The weights matrix", lambda obj: (obj.nO, obj.nP, obj.nI), xavier_uniform_init
    ),
    b=Biases("Bias parameter", lambda obj: (obj.nO, obj.nP)),
    d_W=Gradient("W"),
    d_b=Gradient("b"),
)
class Maxout(Model):
    name = "maxout"

    def __init__(self, nO=None, nI=None, pieces=2, **kwargs):
        Model.__init__(self, **kwargs)
        self.nO = nO
        self.nI = nI
        self.nP = pieces