Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def create_empty_hypothesis(sentences, cost_dict,
ngram=3, transfrom=2, transto=1,
db="sqlite:///:memory:"):
phrases = available_phrases(sentences,
db=db)
hyp0 = HypothesisBase(sentences=sentences,
db=db,
totalnumber=_get_total_number(transto=transto,
db=db),
inputps_with_index=(),
outputps=[],
ngram=ngram,
ngram_words=["", "<s>"]*ngram,
transfrom=transfrom,
transto=transto,
covered=set(),
start=0,
end=0,
prev_start=0,
prev_end=0,
remained=set(enumerate(sentences, 1)),
remain_phrases=phrases,</s>
"covered": prev_hypo.covered.union(set(inputps_with_index)),
"remained": prev_hypo.remained.difference(
set(inputps_with_index)),
"start": start,
"end": end,
"prev_start": prev_start,
"prev_end": prev_end,
"remain_phrases": self._calc_remain_phrases(
inputps_with_index,
prev_hypo.remain_phrases),
"cost_dict": prev_hypo.cost_dict,
# set later
"prob": 0,
"prob_with_cost": 0,
}
HypothesisBase.__init__(self, **args)
# set ngram words
self._ngram_words = self._set_ngram_words()
# set the exact probability
self._prob = self._cal_prob(start - prev_end)
# set the exact probability with cost
self._prob_with_cost = self._cal_prob_with_cost(start - prev_end)
# set the output phrases
self._output_sentences = prev_hypo.output_sentences + outputps
("prob_with_cost", self._prob_with_cost),
#("cost_dict", self._cost_dict),
#("prev_hypo", ""),
]
return u"Hypothesis Object\n" +\
u"\n".join([u" " + k + u": " +
unicode(v) for (k, v) in d])
def __str__(self):
return unicode(self).encode('utf-8')
def __hash__(self):
return hash(unicode(self))
class Hypothesis(HypothesisBase):
"""
Realize like the following class
>>> args = {"sentences": sentences,
... "inputps_with_index": phrase,
... "outputps": outputps,
... "covered": hyp0.covered.union(set(phrase)),
... "remained": hyp0.remained.difference(set(phrase)),
... "start": phrase[0][0],
... "end": phrase[-1][0],
... "prev_start": hyp0.start,
... "prev_end": hyp0.end,
... "remain_phrases": remain_phrases(phrase,
... hyp0.remain_phrases),
... "prev_hypo": hyp0
... }