How to use the pycorrector.rnn_crf.config function in pycorrector

To help you get started, we’ve selected a few pycorrector examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shibing624 / pycorrector / pycorrector / rnn_crf / infer.py View on Github external
def is_error_label_id(label_id, label_ids_dict):
    # return label_id != label_ids_dict['O']
    return label_id == label_ids_dict['M'] or label_id == label_ids_dict['R'] or label_id == label_ids_dict[
        'S'] or label_id == label_ids_dict['W']


if __name__ == '__main__':
    infer(config.save_model_path,
          config.test_id_path,
          config.test_word_path,
          config.test_label_path,
          word_dict_path=config.word_dict_path,
          label_dict_path=config.label_dict_path,
          save_pred_path=config.save_pred_path,
          batch_size=config.batch_size,
          dropout=config.dropout,
          embedding_dim=config.embedding_dim,
          rnn_hidden_dim=config.rnn_hidden_dim,
          maxlen=config.maxlen)
github shibing624 / pycorrector / pycorrector / rnn_crf / preprocess.py View on Github external
print("save line size:%d to %s" % (count, data_path))


if __name__ == '__main__':
    # train data
    train_words, train_labels = [], []
    for path in config.raw_train_paths:
        _, word_list, label_list = parse_xml_file(path)
        train_words.extend(word_list)
        train_labels.extend(label_list)
    save_data_list(train_words, config.train_word_path)
    save_data_list(train_labels, config.train_label_path)

    # test data
    test_ids, test_words, test_labels = [], [], []
    for input_path, truth_path in config.test_paths.items():
        id_list, word_list, label_list = parse_txt_file(input_path, truth_path)
        test_ids.extend(id_list)
        test_words.extend(word_list)
        test_labels.extend(label_list)
    save_data_list(test_ids, config.test_id_path)
    save_data_list(test_words, config.test_word_path)
    save_data_list(test_labels, config.test_label_path)
github shibing624 / pycorrector / pycorrector / rnn_crf / infer.py View on Github external
ids_label_dict[current_error], sentence, gold_error))
                    start_pos = k + 1
                    current_error = label[k]
            if not has_error:
                f.write('%s\tcorrect\t%s\t%s\n' % (sid, sentence, gold_error))
        logger.info('save to %s done, data size: %d' % (out_path, len(X_test)))


def is_error_label_id(label_id, label_ids_dict):
    # return label_id != label_ids_dict['O']
    return label_id == label_ids_dict['M'] or label_id == label_ids_dict['R'] or label_id == label_ids_dict[
        'S'] or label_id == label_ids_dict['W']


if __name__ == '__main__':
    infer(config.save_model_path,
          config.test_id_path,
          config.test_word_path,
          config.test_label_path,
          word_dict_path=config.word_dict_path,
          label_dict_path=config.label_dict_path,
          save_pred_path=config.save_pred_path,
          batch_size=config.batch_size,
          dropout=config.dropout,
          embedding_dim=config.embedding_dim,
          rnn_hidden_dim=config.rnn_hidden_dim,
          maxlen=config.maxlen)
github shibing624 / pycorrector / pycorrector / rnn_crf / preprocess.py View on Github external
return id_lst, word_lst, label_lst


def save_data_list(data_list, data_path):
    with open(data_path, 'w', encoding='utf-8') as f:
        count = 0
        for line in data_list:
            f.write(' '.join(line) + '\n')
            count += 1
        print("save line size:%d to %s" % (count, data_path))


if __name__ == '__main__':
    # train data
    train_words, train_labels = [], []
    for path in config.raw_train_paths:
        _, word_list, label_list = parse_xml_file(path)
        train_words.extend(word_list)
        train_labels.extend(label_list)
    save_data_list(train_words, config.train_word_path)
    save_data_list(train_labels, config.train_label_path)

    # test data
    test_ids, test_words, test_labels = [], [], []
    for input_path, truth_path in config.test_paths.items():
        id_list, word_list, label_list = parse_txt_file(input_path, truth_path)
        test_ids.extend(id_list)
        test_words.extend(word_list)
        test_labels.extend(label_list)
    save_data_list(test_ids, config.test_id_path)
    save_data_list(test_words, config.test_word_path)
    save_data_list(test_labels, config.test_label_path)
github shibing624 / pycorrector / pycorrector / rnn_crf / infer.py View on Github external
'S'] or label_id == label_ids_dict['W']


if __name__ == '__main__':
    infer(config.save_model_path,
          config.test_id_path,
          config.test_word_path,
          config.test_label_path,
          word_dict_path=config.word_dict_path,
          label_dict_path=config.label_dict_path,
          save_pred_path=config.save_pred_path,
          batch_size=config.batch_size,
          dropout=config.dropout,
          embedding_dim=config.embedding_dim,
          rnn_hidden_dim=config.rnn_hidden_dim,
          maxlen=config.maxlen)
github shibing624 / pycorrector / pycorrector / rnn_crf / infer.py View on Github external
f.write('%s\tcorrect\t%s\t%s\n' % (sid, sentence, gold_error))
        logger.info('save to %s done, data size: %d' % (out_path, len(X_test)))


def is_error_label_id(label_id, label_ids_dict):
    # return label_id != label_ids_dict['O']
    return label_id == label_ids_dict['M'] or label_id == label_ids_dict['R'] or label_id == label_ids_dict[
        'S'] or label_id == label_ids_dict['W']


if __name__ == '__main__':
    infer(config.save_model_path,
          config.test_id_path,
          config.test_word_path,
          config.test_label_path,
          word_dict_path=config.word_dict_path,
          label_dict_path=config.label_dict_path,
          save_pred_path=config.save_pred_path,
          batch_size=config.batch_size,
          dropout=config.dropout,
          embedding_dim=config.embedding_dim,
          rnn_hidden_dim=config.rnn_hidden_dim,
          maxlen=config.maxlen)
github shibing624 / pycorrector / pycorrector / rnn_crf / infer.py View on Github external
def is_error_label_id(label_id, label_ids_dict):
    # return label_id != label_ids_dict['O']
    return label_id == label_ids_dict['M'] or label_id == label_ids_dict['R'] or label_id == label_ids_dict[
        'S'] or label_id == label_ids_dict['W']


if __name__ == '__main__':
    infer(config.save_model_path,
          config.test_id_path,
          config.test_word_path,
          config.test_label_path,
          word_dict_path=config.word_dict_path,
          label_dict_path=config.label_dict_path,
          save_pred_path=config.save_pred_path,
          batch_size=config.batch_size,
          dropout=config.dropout,
          embedding_dim=config.embedding_dim,
          rnn_hidden_dim=config.rnn_hidden_dim,
          maxlen=config.maxlen)
github shibing624 / pycorrector / pycorrector / rnn_crf / infer.py View on Github external
logger.info('save to %s done, data size: %d' % (out_path, len(X_test)))


def is_error_label_id(label_id, label_ids_dict):
    # return label_id != label_ids_dict['O']
    return label_id == label_ids_dict['M'] or label_id == label_ids_dict['R'] or label_id == label_ids_dict[
        'S'] or label_id == label_ids_dict['W']


if __name__ == '__main__':
    infer(config.save_model_path,
          config.test_id_path,
          config.test_word_path,
          config.test_label_path,
          word_dict_path=config.word_dict_path,
          label_dict_path=config.label_dict_path,
          save_pred_path=config.save_pred_path,
          batch_size=config.batch_size,
          dropout=config.dropout,
          embedding_dim=config.embedding_dim,
          rnn_hidden_dim=config.rnn_hidden_dim,
          maxlen=config.maxlen)
github shibing624 / pycorrector / pycorrector / rnn_crf / infer.py View on Github external
start_pos = k + 1
                    current_error = label[k]
            if not has_error:
                f.write('%s\tcorrect\t%s\t%s\n' % (sid, sentence, gold_error))
        logger.info('save to %s done, data size: %d' % (out_path, len(X_test)))


def is_error_label_id(label_id, label_ids_dict):
    # return label_id != label_ids_dict['O']
    return label_id == label_ids_dict['M'] or label_id == label_ids_dict['R'] or label_id == label_ids_dict[
        'S'] or label_id == label_ids_dict['W']


if __name__ == '__main__':
    infer(config.save_model_path,
          config.test_id_path,
          config.test_word_path,
          config.test_label_path,
          word_dict_path=config.word_dict_path,
          label_dict_path=config.label_dict_path,
          save_pred_path=config.save_pred_path,
          batch_size=config.batch_size,
          dropout=config.dropout,
          embedding_dim=config.embedding_dim,
          rnn_hidden_dim=config.rnn_hidden_dim,
          maxlen=config.maxlen)
github shibing624 / pycorrector / pycorrector / rnn_crf / infer.py View on Github external
def is_error_label_id(label_id, label_ids_dict):
    # return label_id != label_ids_dict['O']
    return label_id == label_ids_dict['M'] or label_id == label_ids_dict['R'] or label_id == label_ids_dict[
        'S'] or label_id == label_ids_dict['W']


if __name__ == '__main__':
    infer(config.save_model_path,
          config.test_id_path,
          config.test_word_path,
          config.test_label_path,
          word_dict_path=config.word_dict_path,
          label_dict_path=config.label_dict_path,
          save_pred_path=config.save_pred_path,
          batch_size=config.batch_size,
          dropout=config.dropout,
          embedding_dim=config.embedding_dim,
          rnn_hidden_dim=config.rnn_hidden_dim,
          maxlen=config.maxlen)