How to use the jsonlines.open function in jsonlines

To help you get started, we’ve selected a few jsonlines examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github chainer / chainerrl-visualizer / tests / worker_jobs / test_rollout_job.py View on Github external
step_count = 15
    obs_list = MagicMock()
    render_img_list = MagicMock()

    os.makedirs(os.path.join(tmpdir, 'images'))

    if test_case == 'Unsupported':
        with pytest.raises(Exception):
            rollout(agent, gymlike_env, rollout_dir, step_count, obs_list, render_img_list)
            agent.stop_episode.assert_called_once()
        return

    rollout(agent, gymlike_env, rollout_dir, step_count, obs_list, render_img_list)
    agent.stop_episode.assert_called_once()

    with jsonlines.open('{}/{}'.format(tmpdir, ROLLOUT_LOG_FILE_NAME)) as reader:
        lines_num = 0
        for log_line in reader.iter(type=dict):
            lines_num += 1
        assert lines_num == 15

        # Common log entries for each test case
        assert 'step' in log_line
        assert 'reward' in log_line
        assert 'image_path' in log_line
        assert 'action' in log_line

        if test_case == 'A3C':
            assert 'state_value' in log_line
            assert len(log_line['action_probs']) == 4
        elif test_case == 'PPO':
            assert 'state_value' in log_line
github DeFacto / DeFactoNLP / train_label_classifier.py View on Github external
def predict_test(predictions_test, entailment_predictions_test, new_predictions_file):
    clf = joblib.load('label_classifier.pkl')
    i = 1
    previous_predictions = jsonlines.open(predictions_test)
    with jsonlines.open(new_predictions_file, mode='w') as writer:
        for pred in previous_predictions:
            new_pred = {'id': pred['id'], 'predicted_evidence': []}
            entailment_results_file = entailment_predictions_test + "/claim_" + str(i) + ".json"
            entailment_results_file = codecs.open(entailment_results_file, "r", "utf-8").readlines()
            support_evidence = []
            refute_evidence = []
            nei_evidence = []
            support_count = 0
            refute_count = 0
            nei_count = 0
            support_confidence = 0
            refute_confidence = 0
            nei_confidence = 0
            support_scores = []
            refute_scores = []
            nei_scores = []
github DeFacto / DeFactoNLP / metrics.py View on Github external
train_predictions_file = "predictions/predictions_train.jsonl"
    else:  # type_file == 'dev':
        train_file = "data/dev.jsonl"
        train_relevant_file = "data/dev_relevant_docs.jsonl"
        train_concatenate_file = "data/dev_sentence_selection.jsonl"
        train_predictions_file = "predictions/new_dev_bert_test.jsonl"
else:
    print("Needs to have one argument. Choose:")
    print("train")
    print("dev")
    print("test")
    exit(0)

train_file = jsonlines.open(train_file)
train_relevant_file = jsonlines.open(train_relevant_file)
train_concatenate_file = jsonlines.open(train_concatenate_file)
train_predictions_file = jsonlines.open(train_predictions_file)

train_set = []
train_relevant = []
train_concatenate = []
train_prediction = []

for lines in train_file:
    lines['claim'] = lines['claim'].replace("-LRB-", " ( ")
    lines['claim'] = lines['claim'].replace("-RRB-", " ) ")
    train_set.append(lines)

for lines in train_relevant_file:
    lines['claim'] = lines['claim'].replace("-LRB-", " ( ")
    lines['claim'] = lines['claim'].replace("-RRB-", " ) ")
    train_relevant.append(lines)
github microsoft / nlp-recipes / examples / question_answering / distributed_question_answering_squad_transformers_DDP.py View on Github external
print("preprocessing finished")

    if local_rank in [-1, 0]:

        feature_cache_dir = "./cached_qa_features"
        CACHED_EXAMPLES_TEST_FILE = "cached_examples_test.jsonl"
        CACHED_FEATURES_TEST_FILE = "cached_features_test.jsonl"
        examples_file = os.path.join(feature_cache_dir, CACHED_EXAMPLES_TEST_FILE)
        features_file = os.path.join(feature_cache_dir, CACHED_FEATURES_TEST_FILE)

        if os.path.isdir(feature_cache_dir):
            shutil.rmtree(feature_cache_dir, ignore_errors=True)
        os.mkdir(feature_cache_dir)

        with jsonlines.open(examples_file, "w") as examples_writer, jsonlines.open(
            features_file, "w"
        ) as features_writer:

            examples_writer.write_all(qa_examples_json)
            features_writer.write_all(features_json)

        print("features cahed")

    with Timer() as t:
        qa_extractor.fit(
            train_dataset=train_features,
            num_epochs=NUM_EPOCHS,
            learning_rate=LEARNING_RATE,
            per_gpu_batch_size=PER_GPU_BATCH_SIZE,
            gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
            seed=RANDOM_SEED,
github DeFacto / DeFactoNLP / doc2vec.py View on Github external
text = remove_stopwords(doc)
    return text

# TODO:Remove all STOP-WORDS and Lemmatize every token!!!!!

# full text and processed in ['text'] tag
wiki_folder = "data/wiki-pages-split"
files = os.listdir(wiki_folder)
shuffle(files)

counter = 0

train_text = []
tokens = []
for file in files:
    file_content = jsonlines.open(wiki_folder + "/" + file)
    doc = file_content.read()['text']
    text = pre_process(doc)

    if counter > max_counter:
        # adding required docs by fever with the claim given
        file_content = jsonlines.open(wiki_folder + "/" + "Telemundo.json")
        doc = file_content.read()['text']
        text = pre_process(doc)
        tokens = gensim.utils.simple_preprocess(text)
        print(tokens)
        train_text.append(gensim.models.doc2vec.TaggedDocument(tokens, ["Telemundo.json"]))

        file_content = jsonlines.open(wiki_folder + "/" + "Hispanic_and_Latino_Americans.json")
        doc = file_content.read()['text']
        text = pre_process(doc)
        tokens = gensim.utils.simple_preprocess(text)
github tlatkowski / multihead-siamese-nets / data / anli.py View on Github external
def __init__(self, *args):
        super().__init__(*args)
        self.hypothesis = []
        self.reason = []
        self.label = []
        with jsonlines.open(os.path.join(self.data_dir, 'train.jsonl')) as jsonl_reader:
            for instance in jsonl_reader:
                if instance['label'] is 'n':
                    continue
                self.hypothesis.append(instance['hypothesis'])
                self.reason.append(instance['reason'])
                if instance['label'] is 'e':
                    self.label.append(0)
                else:
                    self.label.append(1)
        
        dataset = pd.DataFrame(
            list(
                zip(
                    self.hypothesis,
                    self.reason,
                    self.label,
github chainer / chainerrl-visualizer / appv0 / chainerrlui / model / experiment.py View on Github external
"log": [],
            "command": "",
            "args": {},
            "environ": {},
            "agents": [],
            "envs": [],
            "action_meanings": [],
        }

        log_file_name = os.path.join(self.path, "log.jsonl")
        command_file_name = os.path.join(self.path, "command.txt")
        args_file_name = os.path.join(self.path, "args.jsonl")
        environ_file_name = os.path.join(self.path, "environ.jsonl")

        if os.path.isfile(log_file_name):
            with jsonlines.open(log_file_name) as reader:
                for obj in reader:
                    experiment["log"].append(obj)

        if os.path.isfile(command_file_name):
            with open(command_file_name) as f:
                experiment["command"] = f.readline()

        if os.path.isfile(args_file_name):
            with jsonlines.open(args_file_name) as reader:
                for obj in reader:
                    experiment["args"] = obj

        if os.path.isfile(environ_file_name):
            with jsonlines.open(environ_file_name) as reader:
                for obj in reader:
                    experiment["environ"] = obj
github danmacnish / cartoonify / raspi_install / download_assets.py View on Github external
def download_drawing_dataset():
    try:
        path = download_path / 'drawing_dataset'
        with jsonlines.open(str(label_map_path), mode='r') as reader:
            category_mapping = reader.read()
        print('checking whether drawing files already exist...')
        drawing_categories = ['face', 't-shirt', 'pants'] + category_mapping.values()
        missing_files = [file for file in drawing_categories if not Path(path / Path(file).with_suffix('.bin')).exists()]
        if missing_files:
            print('{} drawing files missing, downloading the following files: '.format(len(missing_files)))
            for f in missing_files:
                print(f)
            download_recurse(quickdraw_dataset_url, path, missing_files)
    except IOError as e:
        print('label_mapping.jsonl not found')
github DeFacto / DeFacto / python / proof_extraction_train.py View on Github external
def export_defacto_models():
    try:
        job_args = []
        print('searching .pkl files in: ', ROOT_PATH + DEFACTO_OUTPUT_FOLDER)
        i=0
        with jsonlines.open(TRAIN_FILE, mode='r') as reader:
            for obj in reader:
                if i > MAX_TRAINING_DATA:
                    break
                i+=1
                f = Path(ROOT_PATH + DEFACTO_OUTPUT_FOLDER + 'defacto_' + str(obj["id"]) + '.pkl')
                if not f.exists() and obj["label"] != 'NOT ENOUGH INFO':
                    job_args.append((obj["id"], obj["claim"], obj["label"], obj["evidence"][0]))

        print('export_defacto_models: job args created: ' + str(len(job_args)))

        if len(job_args) > 0:
            with Pool(processes=int(4)) as pool:
                err_asyncres = pool.starmap(save_defacto_model, job_args)
            print('done! tot errors:', np.count_nonzero(err_asyncres, 0))
            print('done! tot OK:', len(err_asyncres) - np.count_nonzero(err_asyncres, 0))
        else:
github alexwarstadt / data_generation / generation_projects / inductive_biases / sample_inocculating_data.py View on Github external
train_file = jsonlines.open(os.path.join(dir_path, "train.jsonl"))
    train_control_file = jsonlines.open(os.path.join(dir_path, "control_train.jsonl"))
    train_pairs = read_pairs(train_file)
    train_control_pairs = read_pairs(train_control_file)

    percents = [0.001, 0.003, 0.01]
    for p in percents:
        data = random.sample(train_pairs, int(5000.0 * (1 - p)))
        data.extend(random.sample(train_control_pairs, int(5000 * p)))
        output_file = open(os.path.join(dir_path, "train_%s.jsonl" % str(p)), "w")
        w = jsonlines.Writer(output_file)
        data = unzip_pairs(data)
        w.write_all(data)
        w.close()

    test_file = jsonlines.open(os.path.join(dir_path, "test.jsonl"))
    test_control_file = jsonlines.open(os.path.join(dir_path, "control_test.jsonl"))
    test_data = [x for x in test_file]
    test_data.extend([x for x in test_control_file])
    output_file = open(os.path.join(dir_path, "test_combined.jsonl"), "w")
    w = jsonlines.Writer(output_file)
    w.write_all(test_data)
    w.close()

jsonlines

Library with helpers for the jsonlines file format

BSD-2-Clause
Latest version published 11 months ago

Package Health Score

72 / 100
Full package analysis

Similar packages