Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, tag_dict=BROWN_TAGS, tokenizer=PunctTokenizer()):
"""
Creates a Tagger object.
"""
self.__tags_dict = tag_dict
self.__p_tokenizer = tokenizer
### @PROJECT_NAME: SPLAT: Speech Processing and Linguistic Analysis Tool ###
### @VERSION_NUMBER: ###
### @PROJECT_SITE: github.com/meyersbs/SPLAT ###
### @AUTHOR_NAME: Benjamin S. Meyers ###
### @CONTACT_EMAIL: ben@splat-library.org ###
### @LICENSE_TYPE: MIT ###
########################################################################################################################
########################################################################################################################
class POSTagger:
"""
A POSTagger tokenizes the given input with punctuation as separate tokens, and then does a dictionary lookup to
determine the part-of-speech for each token.
"""
__tags_dict = {}
__p_tokenizer = PunctTokenizer()
def __init__(self, tag_dict=BROWN_TAGS, tokenizer=PunctTokenizer()):
"""
Creates a Tagger object.
"""
self.__tags_dict = tag_dict
self.__p_tokenizer = tokenizer
def __tag_list(self, text_list):
tagged = []
for word in self.__p_tokenizer.tokenize(text_list):
if word.lower() in self.__tags_dict.keys():
tag = self.__tags_dict[word.lower()]
elif word in [".", ",", ":", ";", "?", "!"]:
tag = u"PNCT"
else: