How to use the pyserini.trectools._base.TrecRun function in pyserini

To help you get started, we’ve selected a few pyserini examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github castorini / pyserini / pyserini / trectools / _base.py View on Github external
def from_dataframes(dfs, run=None):
        """Return a TrecRun by populating dataframe with the provided list of dataframes.

        Parameters
        ----------
        dfs: List[Dataframe]
            A list of Dataframes conforming to TrecRun.columns

        run: TrecRun
            Set to ``None`` by default. If None, then a new instance of TrecRun will be created.
            Else, the given TrecRun will be modified.
        """

        res = TrecRun() if run is None else run
        res.reset_data()
        res.run_data = res.run_data.append([df for df in dfs], ignore_index=True)

        return res
github castorini / pyserini / pyserini / trectools / _base.py View on Github external
The aggregation method to use.
        depth : int
            Maximum number of results from each input run to consider. Set to ``None`` by default, which indicates that
            the complete list of results is considered.
        k : int
            Length of final results list.  Set to ``None`` by default, which indicates that the union of all input documents
            are ranked.
        """

        if len(runs) < 2:
            raise Exception('Merge requires at least 2 runs.')

        rows = []

        if aggregation == AggregationMethod.SUM:
            topics = list(TrecRun.get_all_topics_from_runs(runs))

            def merge_topic(topic):
                doc_scores = dict()

                for run in runs:
                    for docid, score in run.get_docs_by_topic(topic, depth)[['docid', 'score']].values:
                        doc_scores[docid] = doc_scores.get(docid, 0.0) + score

                sorted_doc_scores = sorted(iter(doc_scores.items()), key=lambda x: (-x[1], x[0]))
                sorted_doc_scores = sorted_doc_scores if k is None else sorted_doc_scores[:k]

                return [
                    (topic, 'Q0', docid, rank, score, 'merge_sum')
                    for rank, (docid, score) in enumerate(sorted_doc_scores, start=1)
                ]
github castorini / pyserini / pyserini / trectools / _base.py View on Github external
def from_search_results(docid_score_pair: Tuple[str, float], topic=1):
        rows = []

        for rank, (docid, score) in enumerate(docid_score_pair, start=1):
            rows.append((topic, 'Q0', docid, rank, score, 'searcher'))

        return TrecRun.from_list(rows)
github castorini / pyserini / pyserini / trectools / _base.py View on Github external
"""Return a TrecRun by populating dataframe with the provided list of tuples.
        For performance reasons, df.to_numpy() is faster than df.iterrows().
        When manipulating dataframes, we first dump to np.ndarray and construct a list of tuples with new values.
        Then use this function to convert the list of tuples to a TrecRun object.

        Parameters
        ----------
        rows: List[tuples]
            List of tuples in the following format: (topic, 'Q0', docid, rank, score, tag)

        run: TrecRun
            Set to ``None`` by default. If None, then a new instance of TrecRun will be created.
            Else, the given TrecRun will be modified.
        """

        res = TrecRun() if run is None else run

        df = pd.DataFrame(rows)
        df.columns = TrecRun.columns
        res.run_data = df.copy()

        return res
github castorini / pyserini / pyserini / trectools / _base.py View on Github external
"""

        df_list = []
        for topic in self.topics():
            if topic not in qrels.topics():
                continue

            qrels_docids = qrels.get_docids(topic)
            topic_df = self.run_data[self.run_data['topic'] == topic]
            if keep is True:
                topic_df = topic_df[topic_df['docid'].isin(qrels_docids)]
            else:
                topic_df = topic_df[~topic_df['docid'].isin(qrels_docids)]
            df_list.append(topic_df)

        run = TrecRun() if clone is True else self
        return TrecRun.from_dataframes(df_list, run)
github castorini / pyserini / pyserini / trectools / _base.py View on Github external
df_list = []
        for topic in self.topics():
            if topic not in qrels.topics():
                continue

            qrels_docids = qrels.get_docids(topic)
            topic_df = self.run_data[self.run_data['topic'] == topic]
            if keep is True:
                topic_df = topic_df[topic_df['docid'].isin(qrels_docids)]
            else:
                topic_df = topic_df[~topic_df['docid'].isin(qrels_docids)]
            df_list.append(topic_df)

        run = TrecRun() if clone is True else self
        return TrecRun.from_dataframes(df_list, run)