How to use the pyahocorasick.Trie function in pyahocorasick

To help you get started, we’ve selected a few pyahocorasick examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def get_test_automaton():
			words = "he her hers his she hi him man himan".split()

			t = Trie();
			for w in words:
				t.add_word(w, w)

			t.make_automaton()

			return t
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testAddedWordShouldBeCountedAndAvailableForRetrieval(self):
		t = Trie()
		t.add_word('python', 'value')
		self.assertEqual(len(t), 1)
		self.assertEqual(t.get('python'), 'value')
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testItemsShouldReturnAllItemsAlreadyAddedToTheTrie(self):
		t = Trie()

		t.add_word('python', 1)
		t.add_word('ada', 2)
		t.add_word('perl', 3)
		t.add_word('pascal', 4)
		t.add_word('php', 5)

		result = list(t.items())
		self.assertEquals(len(result), 5)
		self.assertIn(('python', 1), result)
		self.assertIn(('ada',    2), result)
		self.assertIn(('perl',   3), result)
		self.assertIn(('pascal', 4), result)
		self.assertIn(('php',    5), result)
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testExistShouldDetectAddedWords(self):
		t = Trie()
		t.add_word('python', 'value')
		t.add_word('ada', 'value')

		self.assertTrue(t.exists('python'))
		self.assertTrue(t.exists('ada'))
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testEmptyTrieShouldNotContainsAnyWords(self):
		t = Trie()
		self.assertEqual(len(t), 0)
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testAddingExistingWordShouldReplaceAssociatedValue(self):
		t = Trie()
		t.add_word('python', 'value')
		self.assertEqual(len(t), 1)
		self.assertEqual(t.get('python'), 'value')

		t.add_word('python', 'other')
		self.assertEqual(len(t), 1)
		self.assertEqual(t.get('python'), 'other')
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testGetUnknowWordWithDefaultValueShouldReturnDefault(self):
		t = Trie()
		self.assertEqual(t.get('python', 'default'), 'default')
github WojciechMula / pyahocorasick / py / issue_21.py View on Github external
def test(case):

    tree = pyahocorasick.Trie()
    for word in case['words']:
        tree.add_word(word, word)

    tree.make_automaton()

    actual = [item for item in tree.iter_long(case['input'])]

    if actual != case['expected']:
        print("ERROR:")
        print(actual)
        print(case['expected'])
        assert(False)
github WojciechMula / pyahocorasick / py / exportdot.py View on Github external
writeln("\tnode%d -> node%d [label=\"%s\"]" % (nodeid, destid, label))

	# fail links
	for node in nodes:
		nodeid = id(node)
		failid = id(node.fail)

		if failid != pyahocorasick.nil:
			writeln("\tnode%d -> node%d [color=blue]" % (nodeid, failid))

	writeln("}")


if __name__ == '__main__':
	A = pyahocorasick.Trie()

	A.add_word("he", 0)
	A.add_word("her", 1)
	A.add_word("hers", 2)
	A.add_word("she", 3)
	A.add_word("cat", 4)
	A.add_word("shield", 5)

	with open('trie.dot', 'wt') as f:
		exportdot(A, f)

	A.make_automaton()

	with open('ahocorasick.dot', 'wt') as f:
		exportdot(A, f)

pyahocorasick

pyahocorasick is a fast and memory efficient library for exact or approximate multi-pattern string search. With the ``ahocorasick.Automaton`` class, you can find multiple key string occurrences at once in some input text. You can use it as a plain dict-like Trie or convert a Trie to an automaton for efficient Aho-Corasick search. And pickle to disk for easy reuse of large automatons. Implemented in C and tested on Python 3.6+. Works on Linux, macOS and Windows. BSD-3-Cause license.

BSD-3-Clause
Latest version published 2 months ago

Package Health Score

83 / 100
Full package analysis

Popular pyahocorasick functions