How to use squad - 10 common examples

To help you get started, we’ve selected a few squad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github allenai / document-qa / data_analysis / show_dropped_names.py View on Github external
def show_answers():
    print("Loading...")
    squad = False
    if squad:
        corpus = SquadCorpus()
        docs = corpus.get_train()
        data = split_docs(docs)
    else:
        stop = NltkPlusStopWords()
        data = PreprocessedData(TriviaQaWebDataset(),
                                ExtractSingleParagraph(MergeParagraphs(400), TopTfIdf(stop, 1), intern=True),
                                InMemoryWebQuestionBuilder(None, None),
                                eval_on_verified=False
                                )
        data.load_preprocess("triviaqa-web-merge400-tfidf1.pkl.gz")
        data = data.get_train().data
    print("Get voc...")

    detector = NameDetector()
    wc = QaCorpusLazyStats(data).get_word_counts()
    detector.init(wc)
github allenai / document-qa / train_squad / train_base5.py View on Github external
FullyConnectedMerge(160)),
        match_encoder=SequenceMapperSeq(
            BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            DropoutLayer(0.8),
            StaticAttentionSelf(DotProductProject(160, bias=True, scale=True, share_project=True),
                                FullyConnectedMerge(160)),
        ),
        predictor=BoundsPredictor(ChainBiMapper(
            first_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            second_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
        ))
    )
    with open(__file__, "r") as f:
        notes = f.read()

    corpus = SquadCorpus()
    train_batching = Batcher(45, "bucket_context_words_3", True, False)
    eval_batching = Batcher(45, "context_words", False, False)
    data = FixedParagraphQaTrainingData(corpus, None, train_batching, eval_batching)

    eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
github allenai / document-qa / train_squad / r_net.py View on Github external
SelfAttention(RecurrentAttention(GruCellSpec(75), direction="bidirectional", gated=True)),
                                        DropoutLayer(0.8)),
        predictor= ChainConcatPredictor(
            start_layer=SequenceMapperSeq(
                BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8)),
                BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8))),
            end_layer=BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8))
        )
    )

    with open(__file__, "r") as f:
        notes = f.read()

    eval = [LossEvaluator(), SpanEvaluator(), SentenceSpanEvaluator()]

    corpus = SquadCorpus()
    params = BatchingParameters(60, 60, "bucket_context_words_3",
                                "context_words", True, False)
    data = FixedParagraphQaTrainingData(corpus, None, params, [])

    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
github allenai / document-qa / docqa / data_analysis / visualize_full_doc_errors.py View on Github external
def main():
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("answers")
    args = parser.parse_args()

    data = SquadCorpus()
    origin_mapping = data.get_original_text_mapping()

    stop = set(stopwords.words('english'))

    with open(args.answers, "r") as f:
        answers = [QuestionAnswer(**x) for x in json.load(f)]

    dev_data = {x.question_id:x for x in data.get_dev()}
    paragraph_map = {}
    for p in dev_data.values():
        paragraph_map[(p.article_id, p.paragraph_num)] = p.context

    np.random.shuffle(answers)
    # tmp = open("/tmp/tmp.csv", "w")

    for prediction in answers:
github allenai / document-qa / experimental / batch_paragraph_selection / show_paragraph_selection_fixes.py View on Github external
def main():
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("answers")
    parser.add_argument("paragraph")
    args = parser.parse_args()

    with open(args.answers, "rb") as f:
        answers = pickle.load(f)
    answers = {x.question_id: x for x in answers}

    para_predictions = ParagraphRanks(args.paragraph).get_ranks()

    docs = SquadCorpus().get_dev_docs()

    max_para_len = max(len(doc.paragraphs) for doc in docs)
    top_n_f1_score = np.zeros(max_para_len)
    counts = np.zeros(max_para_len)
    top_n_span_score = np.zeros(max_para_len)

    n_questions = 0
    for doc in docs:
        for para in doc.paragraphs:
            n_questions += len(para.questions)
            for question in para.questions:
                answer = answers[question.question_id]

                best_val = -1
                text_f1 = -1
                span_f1 = 0
github allenai / document-qa / train_squad / train_base4.py View on Github external
),
        embed_mapper=None,
        question_mapper=SequenceMapperSeq(BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))),
        context_mapper=SequenceMapperSeq(BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))),
        memory_builder=NullBiMapper(),
        attention=StaticAttention(DotProduct(True), FullyConnectedMerge(160)),
        match_encoder=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
        predictor=ChainPredictor(
            start_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            end_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))
        )
    )
    with open(__file__, "r") as f:
        notes = f.read()

    corpus = SquadCorpus()
    params = BatchingParameters(45, 45, "bucket_context_words_3",
                                "context_words", True, False)
    data = FixedParagraphQaTrainingData(corpus, None, params, [])

    eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, True)
github allenai / document-qa / train_squad / train_recurrent_atten.py View on Github external
attention=RecurrentAttention(LstmCellSpec(80, keep_probs=0.8), BiLinear(80, bias=True)),
        match_encoder=SequenceMapperSeq(
            BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            DropoutLayer(0.8),
            StaticAttentionSelf(DotProductProject(160, bias=True, scale=True, share_project=True),
                                FullyConnectedMerge(160)),
        ),
        predictor=ChainPredictor(
            start_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            end_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))
        )
    )
    with open(__file__, "r") as f:
        notes = f.read()

    corpus = SquadCorpus()
    train_batching = Batcher(45, "bucket_context_words_3", True, False)
    eval_batching = Batcher(45, "context_words", False, False)
    data = FixedParagraphQaTrainingData(corpus, None, train_batching, eval_batching)

    eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
github allenai / document-qa / experimental / aligned_wiki_qa.py View on Github external
def main():
    corp = WikiArticleQaCorpus(SquadCorpus(), SquadWikiArticles(), True, 0.15)
    corp.get_train_docs()
github allenai / document-qa / train_squad / train_base6.py View on Github external
FullyConnected(160, "tanh"),
            BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            DropoutLayer(0.8),
            StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
            FullyConnected(160, activation="tanh"),
            DropoutLayer(0.8),
        ),
        predictor=BoundsPredictor(ChainBiMapper(
            first_layer=BiRecurrentMapper(LstmCellSpec(80)),
            second_layer=BiRecurrentMapper(LstmCellSpec(80)),
        ))
    )
    with open(__file__, "r") as f:
        notes = f.read()

    corpus = SquadCorpus()
    train_batching = Batcher(45, "bucket_context_words_3", True, False)
    eval_batching = Batcher(45, "context_words", False, False)
    data = FixedParagraphQaTrainingData(corpus, None, train_batching, eval_batching)

    eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
github allenai / document-qa / docqa / data_analysis / visualize_full_doc_errors.py View on Github external
for p in dev_data.values():
        paragraph_map[(p.article_id, p.paragraph_num)] = p.context

    np.random.shuffle(answers)
    # tmp = open("/tmp/tmp.csv", "w")

    for prediction in answers:
        point = dev_data[prediction.question_id]
        start, end = prediction.doc_span

        context = paragraph_map[(point.article_id, prediction.paragraph_num)]
        text = origin_mapping.get_raw_text(point.article_id, prediction.paragraph_num, start, end)

        text_f1 = 0
        for ans in point.answer:
            text_f1 = max(text_f1, text_f1_score(text, ans.text))

        ans_sent = 0
        offset = 0
        while end >= offset+len(context[ans_sent]):
            offset += len(context[ans_sent])
            ans_sent += 1
        sent_start = start-offset
        sent_end = end - offset

        question_words = set(x.lower() for x in point.question if x.lower() not in stop)

        if prediction.paragraph_num != point.paragraph_num and text_f1 == 0:
            # tmp.write(" ".join(point.question))
            # tmp.write("\t" + point.article_title)
            # tmp.write("\t" + text)
            # tmp.write("\t" + str(list(set(x.text for x in point.answer))))