Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def init(tasks_base_path) -> Tuple[TaggedCorpus, TextRegressor, ModelTrainer]:
corpus = NLPTaskDataFetcher.load_corpus(NLPTask.REGRESSION, tasks_base_path)
glove_embedding: WordEmbeddings = WordEmbeddings("glove")
document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
[glove_embedding], 128, 1, False, 64, False, False
)
model = TextRegressor(document_embeddings)
trainer = ModelTrainer(model, corpus)
return corpus, model, trainer
data_folder=tasks_base_path / "fashion", column_format={0: "text", 2: "ner"}
)
tag_dictionary = corpus.make_tag_dictionary("ner")
embeddings = WordEmbeddings("turian")
tagger: SequenceTagger = SequenceTagger(
hidden_size=64,
embeddings=embeddings,
tag_dictionary=tag_dictionary,
tag_type="ner",
use_crf=False,
)
# initialize trainer
trainer: ModelTrainer = ModelTrainer(tagger, corpus)
trainer.train(
results_base_path,
learning_rate=0.1,
mini_batch_size=2,
max_epochs=2,
shuffle=False,
)
loaded_model: SequenceTagger = SequenceTagger.load(
results_base_path / "final-model.pt"
)
sentence = Sentence("I love Berlin")
sentence_empty = Sentence(" ")
data_folder=tasks_base_path / "fashion", column_format={0: "text", 2: "ner"}
)
tag_dictionary = corpus.make_tag_dictionary("ner")
embeddings = FlairEmbeddings("news-forward-fast")
tagger: SequenceTagger = SequenceTagger(
hidden_size=64,
embeddings=embeddings,
tag_dictionary=tag_dictionary,
tag_type="ner",
use_crf=False,
)
# initialize trainer
trainer: ModelTrainer = ModelTrainer(tagger, corpus)
trainer.train(
results_base_path,
learning_rate=0.1,
mini_batch_size=2,
max_epochs=2,
shuffle=False,
)
loaded_model: SequenceTagger = SequenceTagger.load(
results_base_path / "final-model.pt"
)
sentence = Sentence("I love Berlin")
sentence_empty = Sentence(" ")
data_folder=tasks_base_path / "fashion", column_format={0: "text", 2: "ner"}
)
tag_dictionary = corpus.make_tag_dictionary("ner")
embeddings = WordEmbeddings("turian")
tagger: SequenceTagger = SequenceTagger(
hidden_size=64,
embeddings=embeddings,
tag_dictionary=tag_dictionary,
tag_type="ner",
use_crf=False,
)
# initialize trainer
trainer: ModelTrainer = ModelTrainer(tagger, corpus, optimizer=Adam)
trainer.train(
results_base_path,
learning_rate=0.1,
mini_batch_size=2,
max_epochs=2,
shuffle=False,
)
loaded_model: SequenceTagger = SequenceTagger.load(
results_base_path / "final-model.pt"
)
sentence = Sentence("I love Berlin")
sentence_empty = Sentence(" ")
def test_train_load_use_classifier(results_base_path, tasks_base_path):
corpus = flair.datasets.ClassificationCorpus(tasks_base_path / "imdb")
label_dict = corpus.make_label_dictionary()
word_embedding: WordEmbeddings = WordEmbeddings("turian")
document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
[word_embedding], 128, 1, False, 64, False, False
)
model: TextClassifier = TextClassifier(document_embeddings, label_dict, False)
trainer = ModelTrainer(model, corpus)
trainer.train(results_base_path, max_epochs=2, shuffle=False)
sentence = Sentence("Berlin is a really nice city.")
for s in model.predict(sentence):
for l in s.labels:
assert l.value is not None
assert 0.0 <= l.score <= 1.0
assert type(l.score) is float
loaded_model = TextClassifier.load(results_base_path / "final-model.pt")
sentence = Sentence("I love Berlin")
sentence_empty = Sentence(" ")
loaded_model.predict(sentence)
# Initialize document embedding by passing list of word embeddings
document_embeddings = DocumentRNNEmbeddings(
word_embeddings,
hidden_size=512,
reproject_words=True,
reproject_words_dimension=256,
)
# Define classifier
classifier = TextClassifier(
document_embeddings,
label_dictionary=label_dict,
multi_label=False
)
if not checkpoint:
trainer = ModelTrainer(classifier, corpus)
else:
# If checkpoint file is defined, resume training
checkpoint = classifier.load_checkpoint(Path(checkpoint))
trainer = ModelTrainer.load_from_checkpoint(checkpoint, corpus)
# Begin training (enable checkpointing to continue training at a later time, if desired)
trainer.train(
file_path,
EvaluationMetric.MACRO_F1_SCORE,
max_epochs=n_epochs,
checkpoint=True
)
# Plot curves and store weights and losses
plotter = Plotter()
plotter.plot_training_curves(file_path / 'loss.tsv')
FlairEmbeddings('news-backward'),
]
# 4. init document embedding by passing list of word embeddings
document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings(word_embeddings,
hidden_size=512,
reproject_words=True,
reproject_words_dimension=256,
)
# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict, multi_label=False, attention=True)
# 6. initialize the text classifier trainer
trainer = ModelTrainer(classifier, corpus)
# 7. start the training
trainer.train('resources/taggers/ag_news',
learning_rate=0.1,
mini_batch_size=32,
anneal_factor=0.5,
patience=5,
max_epochs=150)
# 8. plot training curves (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('resources/taggers/ag_news/loss.tsv')
plotter.plot_weights('resources/taggers/ag_news/weights.txt')
def train(self, model_dir, tagger, corpus, max_epoch=150, gpu=True):
# 6. initialize trainer
from flair.trainers import ModelTrainer
trainer: ModelTrainer = ModelTrainer(tagger, corpus)
if gpu:
embeddings_storage_mode = 'gpu'
else:
embeddings_storage_mode = 'cpu'
# start training & save model to model_dir
trainer.train(model_dir,
learning_rate=0.1,
mini_batch_size=32,
max_epochs=max_epoch,
embeddings_storage_mode=embeddings_storage_mode,
)