Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_1(self):
n_tokens = len(tokenize(self.text).split(" "))
start = time.time()
word_sent(self.text)
end = time.time()
duration = end - start # in seconds
if duration != 0:
speed = n_tokens / duration
print("Speed: ", speed)
self.assertGreater(speed, EXPECTED_SPEED)
def test_1(self):
n_tokens = 0
for text in self.texts:
n_tokens += len(tokenize(text).split(" "))
start = time.time()
for text in self.texts:
word_sent(text)
end = time.time()
duration = end - start # in seconds
speed = n_tokens / duration
print("Speed: ", speed)
self.assertGreater(speed, EXPECTED_SPEED)
def word_sent(sentence, format=None):
"""
part of speech tagging
:param unicode|str sentence: raw sentence
:return: tagged sentence
:rtype: list
"""
sentence = tokenize(sentence).split()
crf_model = CRFModel.Instance()
output = crf_model.predict(sentence, format)
tokens = [token[0] for token in output]
tags = [token[1] for token in output]
output = []
for tag, token in zip(tags, tokens):
if tag == "IW":
output[-1] = output[-1] + u" " + token
else:
output.append(token)
if format == "text":
output = u" ".join([item.replace(" ", "_") for item in output])
return output
def word_sent(sentence, format=None):
"""
part of speech tagging
:param unicode|str sentence: raw sentence
:return: tagged sentence
:rtype: list
"""
sentence = tokenize(sentence).split()
crf_model = CRFModel.Instance()
output = crf_model.predict(sentence, format)
tokens = [token[0] for token in output]
tags = [token[1] for token in output]
output = []
for tag, token in zip(tags, tokens):
if tag == "IW":
output[-1] = output[-1] + u" " + token
else:
output.append(token)
if format == "text":
output = u" ".join([item.replace(" ", "_") for item in output])
return output
def word_sent(sentence, format=None):
"""
part of speech tagging
:param unicode|str sentence: raw sentence
:return: tagged sentence
:rtype: list
"""
sentence = tokenize(sentence).split()
crf_model = CRFModel.Instance()
output = crf_model.predict(sentence, format)
tokens = [token[0] for token in output]
tags = [token[1] for token in output]
output = []
for tag, token in zip(tags, tokens):
if tag == "IW":
output[-1] = output[-1] + u" " + token
else:
output.append(token)
if format == "text":
output = u" ".join([item.replace(" ", "_") for item in output])
return output
def word_sent(sentence, format=None):
"""
part of speech tagging
:param unicode|str sentence: raw sentence
:return: tagged sentence
:rtype: list
"""
sentence = tokenize(sentence).split()
crf_model = CRFModel.Instance()
output = crf_model.predict(sentence, format)
tokens = [token[0] for token in output]
tags = [token[1] for token in output]
output = []
for tag, token in zip(tags, tokens):
if tag == "IW":
output[-1] = output[-1] + u" " + token
else:
output.append(token)
if format == "text":
output = u" ".join([item.replace(" ", "_") for item in output])
return output
def word_sent(sentence, format=None):
"""
part of speech tagging
:param unicode|str sentence: raw sentence
:return: tagged sentence
:rtype: list
"""
sentence = tokenize(sentence).split()
crf_model = CRFModel.Instance()
output = crf_model.predict(sentence, format)
tokens = [token[0] for token in output]
tags = [token[1] for token in output]
output = []
for tag, token in zip(tags, tokens):
if tag == "IW":
output[-1] = output[-1] + u" " + token
else:
output.append(token)
if format == "text":
output = u" ".join([item.replace(" ", "_") for item in output])
return output
def word_sent(sentence, format=None):
"""
:param unicode|str sentence: raw sentence
:return: segmented sentence
:rtype: unicode|str
"""
sentence = tokenize(sentence)
crf_model = CRFModel.Instance()
result = crf_model.predict(sentence, format)
return result