Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
pretrained_dims = cfg.get("pretrained_dims", 0)
with Model.define_operators({">>": chain, "+": add, "|": concatenate, "**": clone}):
if cfg.get("low_data") and pretrained_dims:
model = (
SpacyVectors
>> flatten_add_lengths
>> with_getitem(0, Affine(width, pretrained_dims))
>> ParametricAttention(width)
>> Pooling(sum_pool)
>> Residual(ReLu(width, width)) ** 2
>> zero_init(Affine(nr_class, width, drop_factor=0.0))
>> logistic
)
return model
lower = HashEmbed(width, nr_vector, column=1)
prefix = HashEmbed(width // 2, nr_vector, column=2)
suffix = HashEmbed(width // 2, nr_vector, column=3)
shape = HashEmbed(width // 2, nr_vector, column=4)
trained_vectors = FeatureExtracter(
[ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
) >> with_flatten(
uniqued(
(lower | prefix | suffix | shape)
>> LN(Maxout(width, width + (width // 2) * 3)),
column=0,
)
)
if pretrained_dims:
static_vectors = SpacyVectors >> with_flatten(
def build_text_classifier(nr_class, width=64, **cfg):
depth = cfg.get("depth", 2)
nr_vector = cfg.get("nr_vector", 5000)
pretrained_dims = cfg.get("pretrained_dims", 0)
with Model.define_operators({">>": chain, "+": add, "|": concatenate, "**": clone}):
if cfg.get("low_data") and pretrained_dims:
model = (
SpacyVectors
>> flatten_add_lengths
>> with_getitem(0, Affine(width, pretrained_dims))
>> ParametricAttention(width)
>> Pooling(sum_pool)
>> Residual(ReLu(width, width)) ** 2
>> zero_init(Affine(nr_class, width, drop_factor=0.0))
>> logistic
)
return model
lower = HashEmbed(width, nr_vector, column=1)
prefix = HashEmbed(width // 2, nr_vector, column=2)
suffix = HashEmbed(width // 2, nr_vector, column=3)
>> zero_init(Affine(nr_class, width, drop_factor=0.0))
>> logistic
)
return model
lower = HashEmbed(width, nr_vector, column=1)
prefix = HashEmbed(width // 2, nr_vector, column=2)
suffix = HashEmbed(width // 2, nr_vector, column=3)
shape = HashEmbed(width // 2, nr_vector, column=4)
trained_vectors = FeatureExtracter(
[ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
) >> with_flatten(
uniqued(
(lower | prefix | suffix | shape)
>> LN(Maxout(width, width + (width // 2) * 3)),
column=0,
)
)
if pretrained_dims:
static_vectors = SpacyVectors >> with_flatten(
Affine(width, pretrained_dims)
)
# TODO Make concatenate support lists
vectors = concatenate_lists(trained_vectors, static_vectors)
vectors_width = width * 2
else:
vectors = trained_vectors
vectors_width = width
static_vectors = None
tok2vec = vectors >> with_flatten(
momentum=0.9,
dropout=0.5,
dropout_decay=1e-4,
nb_epoch=20,
L2=1e-6,
):
cfg = dict(locals())
print(cfg)
prefer_gpu()
train_data, check_data, nr_tag = ancora_pos_tags()
extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
Model.lsuv = True
with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}):
lower_case = HashEmbed(width, 100, column=0)
shape = HashEmbed(width // 2, 200, column=1)
prefix = HashEmbed(width // 2, 100, column=2)
suffix = HashEmbed(width // 2, 100, column=3)
model = with_flatten(
(lower_case | shape | prefix | suffix)
>> Maxout(width, pieces=3)
>> Residual(ExtractWindow(nW=1) >> Maxout(width, pieces=3)) ** depth
>> Softmax(nr_tag),
pad=depth,
)
train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)
n_train = float(sum(len(x) for x in train_X))
global epoch_train_acc
# For backwards compatibility with models before the architecture registry,
# we have to be careful to get exactly the same model structure. One subtle
# trick is that when we define concatenation with the operator, the operator
# is actually binary associative. So when we write (a | b | c), we're actually
# getting concatenate(concatenate(a, b), c). That's why the implementation
# is a bit ugly here.
cols = config["columns"]
width = config["width"]
rows = config["rows"]
norm = HashEmbed(width, rows, column=cols.index("NORM"), name="embed_norm")
if config["use_subwords"]:
prefix = HashEmbed(
width, rows // 2, column=cols.index("PREFIX"), name="embed_prefix"
)
suffix = HashEmbed(
width, rows // 2, column=cols.index("SUFFIX"), name="embed_suffix"
)
shape = HashEmbed(
width, rows // 2, column=cols.index("SHAPE"), name="embed_shape"
)
if config.get("@pretrained_vectors"):
glove = make_layer(config["@pretrained_vectors"])
mix = make_layer(config["@mix"])
with Model.define_operators({">>": chain, "|": concatenate}):
if config["use_subwords"] and config["@pretrained_vectors"]:
mix._layers[0].nI = width * 5
layer = uniqued(
(glove | norm | prefix | suffix | shape) >> mix,
column=cols.index("ORTH"),
)
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
embed = (
HashEmbed(width, 5000, column=1)
| StaticVectors(vectors_name, width, column=5)
| HashEmbed(width // 2, 750, column=2)
| HashEmbed(width // 2, 750, column=3)
| HashEmbed(width // 2, 750, column=4)
) >> LN(Maxout(width))
sent2vec = (
with_flatten(embed)
>> Residual(
prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
>> MultiHeadedAttention()
>> with_flatten(Maxout(width, width, pieces=3))
)
>> flatten_add_lengths
>> ParametricAttention(width, hard=False)
>> Pooling(mean_pool)
>> Residual(LN(Maxout(width)))
)
model = (
momentum=0.9,
dropout=0.5,
dropout_decay=1e-4,
nb_epoch=20,
L2=1e-6,
):
cfg = dict(locals())
print(cfg)
prefer_gpu()
train_data, check_data, nr_tag = ancora_pos_tags()
extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
Model.lsuv = True
with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}):
lower_case = HashEmbed(width, 100, column=0)
shape = HashEmbed(width // 2, 200, column=1)
prefix = HashEmbed(width // 2, 100, column=2)
suffix = HashEmbed(width // 2, 100, column=3)
model = (
with_flatten(
(lower_case | shape | prefix | suffix)
>> Maxout(width, width+(width//2)*3, pieces=3))
>> PositionEncode(1000, width)
>> Residual(
prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
>> MultiHeadedAttention()
>> with_flatten(Affine(width, width)))
>> with_flatten(Softmax(nr_tag, width))
)
train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
max_batch_size=16,
learn_rate=0.001,
momentum=0.9,
dropout=0.5,
dropout_decay=1e-4,
nb_epoch=20,
L2=1e-6,
):
prefer_gpu()
cfg = dict(locals())
print(cfg)
train_data, check_data, nr_tag = ancora_pos_tags()
extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}):
lower_case = HashEmbed(width, 100, column=0)
shape = HashEmbed(width // 2, 200, column=1)
prefix = HashEmbed(width // 2, 100, column=2)
suffix = HashEmbed(width // 2, 100, column=3)
model = (
with_flatten(
(lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)
)
>> BiLSTM(width, width) ** depth
>> with_flatten(Softmax(nr_tag))
)
train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)
n_train = float(sum(len(x) for x in train_X))
subword_features = False
conv_depth = kwargs.get("conv_depth", 4)
bilstm_depth = kwargs.get("bilstm_depth", 0)
cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
with Model.define_operators(
{">>": chain, "|": concatenate, "**": clone, "+": add, "*": reapply}
):
norm = HashEmbed(width, embed_size, column=cols.index(NORM), name="embed_norm")
if subword_features:
prefix = HashEmbed(
width, embed_size // 2, column=cols.index(PREFIX), name="embed_prefix"
)
suffix = HashEmbed(
width, embed_size // 2, column=cols.index(SUFFIX), name="embed_suffix"
)
shape = HashEmbed(
width, embed_size // 2, column=cols.index(SHAPE), name="embed_shape"
)
else:
prefix, suffix, shape = (None, None, None)
if pretrained_vectors is not None:
glove = StaticVectors(pretrained_vectors, width, column=cols.index(ID))
if subword_features:
embed = uniqued(
(glove | norm | prefix | suffix | shape)
>> LN(Maxout(width, width * 5, pieces=3)),
column=cols.index(ORTH),
)
else:
embed = uniqued(
(glove | norm) >> LN(Maxout(width, width * 2, pieces=3)),
def MultiHashEmbed(config):
# For backwards compatibility with models before the architecture registry,
# we have to be careful to get exactly the same model structure. One subtle
# trick is that when we define concatenation with the operator, the operator
# is actually binary associative. So when we write (a | b | c), we're actually
# getting concatenate(concatenate(a, b), c). That's why the implementation
# is a bit ugly here.
cols = config["columns"]
width = config["width"]
rows = config["rows"]
norm = HashEmbed(width, rows, column=cols.index("NORM"), name="embed_norm")
if config["use_subwords"]:
prefix = HashEmbed(
width, rows // 2, column=cols.index("PREFIX"), name="embed_prefix"
)
suffix = HashEmbed(
width, rows // 2, column=cols.index("SUFFIX"), name="embed_suffix"
)
shape = HashEmbed(
width, rows // 2, column=cols.index("SHAPE"), name="embed_shape"
)
if config.get("@pretrained_vectors"):
glove = make_layer(config["@pretrained_vectors"])
mix = make_layer(config["@mix"])
with Model.define_operators({">>": chain, "|": concatenate}):
if config["use_subwords"] and config["@pretrained_vectors"]: