How to use the pycorrector.utils.logger.logger.info function in pycorrector

To help you get started, we’ve selected a few pycorrector examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shibing624 / pycorrector / pycorrector / seq2seq / train_generator.py View on Github external
for t, char in enumerate(input_text):
            encoder_input_data[i, t, input_token_index[char]] = 1.0
        for t, char in enumerate(target_text):
            # decoder_target_data is a head of decoder_input_data by one timestep
            decoder_input_data[i, t, target_token_index[char]] = 1.0
            if t > 0:
                decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    logger.info("Data loaded.")

    # split to train and val
    encoder_input_data_train, encoder_input_data_val, decoder_input_data_train, decoder_input_data_val, \
    decoder_target_data_train, decoder_target_data_val = train_test_split(
        encoder_input_data, decoder_input_data, decoder_target_data, test_size=0.1)

    # model
    logger.info("Training seq2seq model...")
    model, encoder_model, decoder_model = create_model(num_encoder_tokens, num_decoder_tokens, rnn_hidden_dim)
    # Run training
    callbacks_list = callback(save_model_path, logger)
    model.fit_generator(
        generator=data_generator(encoder_input_data_train, decoder_input_data_train, decoder_target_data_train,
                                 batch_size),
        steps_per_epoch=(len(encoder_input_data_train) + batch_size - 1) // batch_size,
        epochs=epochs,
        verbose=1,
        validation_data=([encoder_input_data_val, decoder_input_data_val], decoder_target_data_val),
        callbacks=callbacks_list)
    encoder_model.save(encoder_model_path)
    decoder_model.save(decoder_model_path)
    logger.info("Model save to " + save_model_path)
    logger.info("Training has finished.")
github shibing624 / pycorrector / pycorrector / seq2seq / train_generator.py View on Github external
save_word_dict(target_token_index, save_target_token_path)

    encoder_input_data = np.zeros((len(input_texts), max_input_texts_len, num_encoder_tokens), dtype='float32')
    decoder_input_data = np.zeros((len(input_texts), max_target_texts_len, num_decoder_tokens), dtype='float32')
    decoder_target_data = np.zeros((len(input_texts), max_target_texts_len, num_decoder_tokens), dtype='float32')

    # one hot representation
    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            encoder_input_data[i, t, input_token_index[char]] = 1.0
        for t, char in enumerate(target_text):
            # decoder_target_data is a head of decoder_input_data by one timestep
            decoder_input_data[i, t, target_token_index[char]] = 1.0
            if t > 0:
                decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    logger.info("Data loaded.")

    # split to train and val
    encoder_input_data_train, encoder_input_data_val, decoder_input_data_train, decoder_input_data_val, \
    decoder_target_data_train, decoder_target_data_val = train_test_split(
        encoder_input_data, decoder_input_data, decoder_target_data, test_size=0.1)

    # model
    logger.info("Training seq2seq model...")
    model, encoder_model, decoder_model = create_model(num_encoder_tokens, num_decoder_tokens, rnn_hidden_dim)
    # Run training
    callbacks_list = callback(save_model_path, logger)
    model.fit_generator(
        generator=data_generator(encoder_input_data_train, decoder_input_data_train, decoder_target_data_train,
                                 batch_size),
        steps_per_epoch=(len(encoder_input_data_train) + batch_size - 1) // batch_size,
        epochs=epochs,
github shibing624 / pycorrector / pycorrector / seq2seq / train_generator.py View on Github external
# model
    logger.info("Training seq2seq model...")
    model, encoder_model, decoder_model = create_model(num_encoder_tokens, num_decoder_tokens, rnn_hidden_dim)
    # Run training
    callbacks_list = callback(save_model_path, logger)
    model.fit_generator(
        generator=data_generator(encoder_input_data_train, decoder_input_data_train, decoder_target_data_train,
                                 batch_size),
        steps_per_epoch=(len(encoder_input_data_train) + batch_size - 1) // batch_size,
        epochs=epochs,
        verbose=1,
        validation_data=([encoder_input_data_val, decoder_input_data_val], decoder_target_data_val),
        callbacks=callbacks_list)
    encoder_model.save(encoder_model_path)
    decoder_model.save(decoder_model_path)
    logger.info("Model save to " + save_model_path)
    logger.info("Training has finished.")

    evaluate(encoder_model, decoder_model, num_encoder_tokens,
             num_decoder_tokens, rnn_hidden_dim, target_token_index,
             max_target_texts_len, encoder_input_data_val, input_texts)
github shibing624 / pycorrector / pycorrector / seq2seq / train_generator.py View on Github external
logger.info("Training seq2seq model...")
    model, encoder_model, decoder_model = create_model(num_encoder_tokens, num_decoder_tokens, rnn_hidden_dim)
    # Run training
    callbacks_list = callback(save_model_path, logger)
    model.fit_generator(
        generator=data_generator(encoder_input_data_train, decoder_input_data_train, decoder_target_data_train,
                                 batch_size),
        steps_per_epoch=(len(encoder_input_data_train) + batch_size - 1) // batch_size,
        epochs=epochs,
        verbose=1,
        validation_data=([encoder_input_data_val, decoder_input_data_val], decoder_target_data_val),
        callbacks=callbacks_list)
    encoder_model.save(encoder_model_path)
    decoder_model.save(decoder_model_path)
    logger.info("Model save to " + save_model_path)
    logger.info("Training has finished.")

    evaluate(encoder_model, decoder_model, num_encoder_tokens,
             num_decoder_tokens, rnn_hidden_dim, target_token_index,
             max_target_texts_len, encoder_input_data_val, input_texts)
github shibing624 / pycorrector / pycorrector / bert / predict_mask.py View on Github external
' prob:', top_score_val[j].item())
        print()

    if args.predict_file:
        eval_examples = read_lm_examples(input_file=args.predict_file)
        eval_features = convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            mask_token=MASK_TOKEN,
            mask_id=MASK_ID)

        logger.info("***** Running predictions *****")
        logger.info("  Num orig examples = %d", len(eval_examples))
        logger.info("  Num split examples = %d", len(eval_features))
        logger.info("Start predict ...")
        for f in eval_features:
            input_ids = torch.tensor([f.input_ids])
            segment_ids = torch.tensor([f.segment_ids])
            predictions = model(input_ids, segment_ids)
            # confirm we were able to predict 'henson'
            mask_positions = f.mask_positions

            if mask_positions:
                for idx, i in enumerate(mask_positions):
                    if not i:
                        continue
                    scores = predictions[0, i]
                    # predicted_index = torch.argmax(scores).item()
                    top_scores = torch.sort(scores, 0, True)
                    top_score_val = top_scores[0][:5]
                    top_score_idx = top_scores[1][:5]
github shibing624 / pycorrector / pycorrector / rnn_crf / infer.py View on Github external
has_error = True
                if continue_error and label[k] != current_error and not error_label_id:
                    end_pos = k
                    f.write('%s\t%d\t%d\t%s\t%s\t%s\n' % (sid, start_pos, end_pos,
                                                          ids_label_dict[current_error], sentence, gold_error))
                    continue_error = False
                    current_error = 0
                if continue_error and label[k] != current_error and error_label_id:
                    end_pos = k
                    f.write('%s\t%d\t%d\t%s\t%s\t%s\n' % (sid, start_pos, end_pos,
                                                          ids_label_dict[current_error], sentence, gold_error))
                    start_pos = k + 1
                    current_error = label[k]
            if not has_error:
                f.write('%s\tcorrect\t%s\t%s\n' % (sid, sentence, gold_error))
        logger.info('save to %s done, data size: %d' % (out_path, len(X_test)))
github shibing624 / pycorrector / pycorrector / bert / predict_mask.py View on Github external
for j in range(len(top_score_idx)):
            print('Mask predict is:', tokenizer.convert_ids_to_tokens([top_score_idx[j].item()])[0],
                  ' prob:', top_score_val[j].item())
        print()

    if args.predict_file:
        eval_examples = read_lm_examples(input_file=args.predict_file)
        eval_features = convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            mask_token=MASK_TOKEN,
            mask_id=MASK_ID)

        logger.info("***** Running predictions *****")
        logger.info("  Num orig examples = %d", len(eval_examples))
        logger.info("  Num split examples = %d", len(eval_features))
        logger.info("Start predict ...")
        for f in eval_features:
            input_ids = torch.tensor([f.input_ids])
            segment_ids = torch.tensor([f.segment_ids])
            predictions = model(input_ids, segment_ids)
            # confirm we were able to predict 'henson'
            mask_positions = f.mask_positions

            if mask_positions:
                for idx, i in enumerate(mask_positions):
                    if not i:
                        continue
                    scores = predictions[0, i]
                    # predicted_index = torch.argmax(scores).item()
                    top_scores = torch.sort(scores, 0, True)