How to use the pycorrector.seq2seq_attention.config function in pycorrector

To help you get started, we’ve selected a few pycorrector examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shibing624 / pycorrector / pycorrector / seq2seq_attention / train.py View on Github external
dropout=dropout,
                             gpu_id=gpu_id
                             ).build_model()
    evaluator = Evaluate(model, attn_model_path, vocab2id, id2vocab, maxlen)
    earlystop = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')
    model.fit_generator(data_generator(source_texts, target_texts, vocab2id, batch_size, maxlen),
                        steps_per_epoch=(len(source_texts) + batch_size - 1) // batch_size,
                        epochs=epochs,
                        validation_data=get_validation_data(test_input_texts, test_target_texts, vocab2id, maxlen),
                        callbacks=[evaluator, earlystop])


if __name__ == "__main__":
    train(train_path=config.train_path,
          test_path=config.test_path,
          save_vocab_path=config.save_vocab_path,
          attn_model_path=config.attn_model_path,
          batch_size=config.batch_size,
          epochs=config.epochs,
          maxlen=config.maxlen,
          hidden_dim=config.rnn_hidden_dim,
          dropout=config.dropout,
          vocab_max_size=config.vocab_max_size,
          vocab_min_count=config.vocab_min_count,
          gpu_id=config.gpu_id)
github shibing624 / pycorrector / pycorrector / seq2seq_attention / train.py View on Github external
attn_model_path=attn_model_path,
                             hidden_dim=hidden_dim,
                             dropout=dropout,
                             gpu_id=gpu_id
                             ).build_model()
    evaluator = Evaluate(model, attn_model_path, vocab2id, id2vocab, maxlen)
    earlystop = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')
    model.fit_generator(data_generator(source_texts, target_texts, vocab2id, batch_size, maxlen),
                        steps_per_epoch=(len(source_texts) + batch_size - 1) // batch_size,
                        epochs=epochs,
                        validation_data=get_validation_data(test_input_texts, test_target_texts, vocab2id, maxlen),
                        callbacks=[evaluator, earlystop])


if __name__ == "__main__":
    train(train_path=config.train_path,
          test_path=config.test_path,
          save_vocab_path=config.save_vocab_path,
          attn_model_path=config.attn_model_path,
          batch_size=config.batch_size,
          epochs=config.epochs,
          maxlen=config.maxlen,
          hidden_dim=config.rnn_hidden_dim,
          dropout=config.dropout,
          vocab_max_size=config.vocab_max_size,
          vocab_min_count=config.vocab_min_count,
          gpu_id=config.gpu_id)
github shibing624 / pycorrector / pycorrector / seq2seq_attention / preprocess_short_text.py View on Github external
f.write('src: ' + ' '.join(src) + '\n')
            f.write('dst: ' + ' '.join(dst) + '\n')
            count += 1
        print("save line size:%d to %s" % (count, data_path))


def transform_corpus_data(data_list, train_data_path, test_data_path):
    train_lst, test_lst = train_test_split(data_list, test_size=0.1)
    _save_data(train_lst, train_data_path)
    _save_data(test_lst, test_data_path)


if __name__ == '__main__':
    # train data
    data_list = []
    for path in config.raw_train_paths:
        data_list.extend(parse_xml_file(path))
    transform_corpus_data(data_list, config.train_path, config.test_path)
github shibing624 / pycorrector / pycorrector / seq2seq_attention / preprocess.py View on Github external
for src, dst in data_list:
            f.write(' '.join(src) + '\t' + ' '.join(dst) + '\n')
            count += 1
        print("save line size:%d to %s" % (count, data_path))


def transform_corpus_data(data_list, train_data_path, test_data_path):
    train_lst, test_lst = train_test_split(data_list, test_size=0.1)
    _save_data(train_lst, train_data_path)
    _save_data(test_lst, test_data_path)


if __name__ == '__main__':
    # train data
    data_list = []
    for path in config.raw_train_paths:
        data_list.extend(parse_xml_file(path))
    transform_corpus_data(data_list, config.train_path, config.test_path)
github shibing624 / pycorrector / pycorrector / seq2seq_attention / preprocess.py View on Github external
count += 1
        print("save line size:%d to %s" % (count, data_path))


def transform_corpus_data(data_list, train_data_path, test_data_path):
    train_lst, test_lst = train_test_split(data_list, test_size=0.1)
    _save_data(train_lst, train_data_path)
    _save_data(test_lst, test_data_path)


if __name__ == '__main__':
    # train data
    data_list = []
    for path in config.raw_train_paths:
        data_list.extend(parse_xml_file(path))
    transform_corpus_data(data_list, config.train_path, config.test_path)
github shibing624 / pycorrector / pycorrector / seq2seq_attention / preprocess_short_text.py View on Github external
count += 1
        print("save line size:%d to %s" % (count, data_path))


def transform_corpus_data(data_list, train_data_path, test_data_path):
    train_lst, test_lst = train_test_split(data_list, test_size=0.1)
    _save_data(train_lst, train_data_path)
    _save_data(test_lst, test_data_path)


if __name__ == '__main__':
    # train data
    data_list = []
    for path in config.raw_train_paths:
        data_list.extend(parse_xml_file(path))
    transform_corpus_data(data_list, config.train_path, config.test_path)
github shibing624 / pycorrector / pycorrector / seq2seq_attention / infer.py View on Github external
if __name__ == "__main__":
    inputs = [
        '由我起开始做。',
        '没有解决这个问题,',
        '由我起开始做。',
        '由我起开始做',
        '不能人类实现更美好的将来。',
        '这几年前时间,',
        '歌曲使人的感到快乐,',
        '会能够大幅减少互相抱怨的情况。'
    ]
    inference = Inference(save_vocab_path=config.save_vocab_path,
                          attn_model_path=config.attn_model_path,
                          maxlen=400,
                          gpu_id=config.gpu_id)
    for i in inputs:
        target = inference.infer(i)
        print('input:' + i)
        print('output:' + target)
    while True:
        sent = input('input:')
        print("output:" + inference.infer(sent))
github shibing624 / pycorrector / pycorrector / seq2seq_attention / infer.py View on Github external
def infer(self, sentence):
        return gen_target(sentence, self.model, self.vocab2id, self.id2vocab, self.maxlen, topk=3)


if __name__ == "__main__":
    inputs = [
        '由我起开始做。',
        '没有解决这个问题,',
        '由我起开始做。',
        '由我起开始做',
        '不能人类实现更美好的将来。',
        '这几年前时间,',
        '歌曲使人的感到快乐,',
        '会能够大幅减少互相抱怨的情况。'
    ]
    inference = Inference(save_vocab_path=config.save_vocab_path,
                          attn_model_path=config.attn_model_path,
                          maxlen=400,
                          gpu_id=config.gpu_id)
    for i in inputs:
        target = inference.infer(i)
        print('input:' + i)
        print('output:' + target)
    while True:
        sent = input('input:')
        print("output:" + inference.infer(sent))