How to use pyserini - 10 common examples

To help you get started, we’ve selected a few pyserini examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github castorini / pyserini / tests / test_load_topics.py View on Github external
def test_trec2019_bl(self):
        topics = search.get_topics('trec2019_bl')
        self.assertEqual(len(topics), 60)
        self.assertEqual('d7d906991e2883889f850de9ae06655e', topics[870]['title'])
        self.assertEqual('0d7f5e24cafc019265d3ee4b9745e7ea', topics[829]['title'])
        self.assertTrue(isinstance(next(iter(topics.keys())), int))
github castorini / pyserini / tests / test_load_topics.py View on Github external
def test_covid_round1(self):
        topics = search.get_topics('covid_round1_udel')
        self.assertEqual(len(topics), 30)
        self.assertEqual('coronavirus origin origin COVID-19', topics[1]['query'])
        self.assertEqual('coronavirus remdesivir remdesivir effective treatment COVID-19', topics[30]['query'])
        self.assertTrue(isinstance(next(iter(topics.keys())), int))
github castorini / pyserini / tests / test_load_topics.py View on Github external
def test_trec_topicreader(self):
        # Running from command-line, we're in root of repo, but running in IDE, we're in tests/
        path = 'tools/topics-and-qrels/topics.robust04.txt'
        if not os.path.exists(path):
            path = f'../{path}'

        self.assertTrue(os.path.exists(path))
        topics = search.get_topics_with_reader('io.anserini.search.topicreader.TrecTopicReader', path)
        self.assertEqual(len(topics), 250)
        self.assertTrue(isinstance(next(iter(topics.keys())), int))

        self.assertEqual(search.get_topics('robust04'), topics)
github castorini / pyserini / tests / test_load_topics.py View on Github external
def test_covid_round4_udel(self):
        topics = search.get_topics('covid_round4_udel')
        self.assertEqual(len(topics), 45)
        self.assertEqual('coronavirus origin origin COVID-19', topics[1]['query'])
        self.assertEqual('coronavirus mental health impact COVID-19 pandemic impacted mental health',
                         topics[45]['query'])
        self.assertTrue(isinstance(next(iter(topics.keys())), int))
github castorini / pyserini / tests / test_load_topics.py View on Github external
def test_car20(self):
        topics = search.get_topics('car17v2.0_benchmarkY1test')
        self.assertEqual(len(topics), 2254)
        self.assertFalse(isinstance(next(iter(topics.keys())), int))
github castorini / pyserini / tests / test_load_topics.py View on Github external
def test_msmarco_doc(self):
        topics = search.get_topics('msmarco_doc_dev')
        self.assertEqual(len(topics), 5193)
        self.assertTrue(isinstance(next(iter(topics.keys())), int))
github castorini / pyserini / tests / test_load_topics.py View on Github external
def test_core18(self):
        topics = search.get_topics('core18')
        self.assertEqual(len(topics), 50)
        self.assertTrue(isinstance(next(iter(topics.keys())), int))
github castorini / pyserini / tests / test_load_topics.py View on Github external
def test_covid_round3(self):
        topics = search.get_topics('covid_round3')
        self.assertEqual(len(topics), 40)
        self.assertEqual('coronavirus origin', topics[1]['query'])
        self.assertEqual('coronavirus mutations', topics[40]['query'])
        self.assertTrue(isinstance(next(iter(topics.keys())), int))
github castorini / pyserini / tests / test_load_topics.py View on Github external
def test_msmarco_passage(self):
        topics = search.get_topics('msmarco_passage_dev_subset')
        self.assertEqual(len(topics), 6980)
        self.assertTrue(isinstance(next(iter(topics.keys())), int))
github castorini / pyserini / tests / test_analysis.py View on Github external
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])

        # Specify Porter stemmer explicitly
        analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='porter'))
        self.assertTrue(isinstance(analyzer, Analyzer))
        tokens = analyzer.analyze('City buses are running on time.')
        self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])

        # Specify Krovetz stemmer explicitly
        analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='krovetz'))
        self.assertTrue(isinstance(analyzer, Analyzer))
        tokens = analyzer.analyze('City buses are running on time.')
        self.assertEqual(tokens, ['city', 'bus', 'running', 'time'])

        # No stemming
        analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=False))
        self.assertTrue(isinstance(analyzer, Analyzer))
        tokens = analyzer.analyze('City buses are running on time.')
        self.assertEqual(tokens, ['city', 'buses', 'running', 'time'])

        # No stopword filter, no stemming
        analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=False, stopwords=False))
        self.assertTrue(isinstance(analyzer, Analyzer))
        tokens = analyzer.analyze('City buses are running on time.')
        self.assertEqual(tokens, ['city', 'buses', 'are', 'running', 'on', 'time'])

        # No stopword filter, with stemming
        analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=True, stopwords=False))
        self.assertTrue(isinstance(analyzer, Analyzer))
        tokens = analyzer.analyze('City buses are running on time.')
        self.assertEqual(tokens, ['citi', 'buse', 'ar', 'run', 'on', 'time'])