Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# compute input vars for fuzzy
# height ratio wrt main text height
hrii=hii/main_height
# lowest y0
y0ii=[ljj.y0 for ljj in gii]
y0ii=np.min(y0ii)/page_h
# number of words
nwordsii=len(tii.split(' '))
# similartiy measure between a predefined list of non-title words
notitlefmii=[fuzz.token_set_ratio(tii,jj) for jj in NON_TITLE_LIST]
notitlefmii=np.mean(notitlefmii)
# similarity measure between title obtained from meta data
if doctitle:
metatitlefmii=fuzz.ratio(tii, doctitle)
gr_lines.append((tii,hii,y0ii,hrii,nwordsii,notitlefmii,metatitlefmii))
else:
gr_lines.append((tii,hii,y0ii,hrii,nwordsii,notitlefmii))
#pprint(gr_lines)
#----------------Do fuzzy logic----------------
fuzz_scores=FCTitleGuess(gr_lines, doctitle)
title_idx=np.argmax(fuzz_scores)
title_guess=gr_lines[title_idx]
title_y0=title_guess[2]*page_h
title_x0=groups[title_idx][0].x0
#----------------Guess author list----------------
top_lines=line_dict.keys()
def compare_output(baseline, current):
similarity = 50;
if (DEFAULT_ALGORITHM == 'ratio'):
similarity = fuzz.ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'partial_ratio'):
similarity = fuzz.partial_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'token_sort_ratio'):
similarity = fuzz.token_sort_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'partial_token_sort_ratio'):
similarity = fuzz.partial_token_sort_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'token_set_ratio'):
similarity = fuzz.token_set_ratio(baseline, current)
else:
print("Unknown similarity measure " + DEFAULT_ALGORITHM + ". Aborting")
sys.exit(-1)
return similarity
def compare_output(baseline, current):
similarity = 50;
if (DEFAULT_ALGORITHM == 'ratio'):
similarity = fuzz.ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'partial_ratio'):
similarity = fuzz.partial_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'token_sort_ratio'):
similarity = fuzz.token_sort_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'partial_token_sort_ratio'):
similarity = fuzz.partial_token_sort_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'token_set_ratio'):
similarity = fuzz.token_set_ratio(baseline, current)
else:
print("Unknown similarity measure " + DEFAULT_ALGORITHM + ". Aborting")
sys.exit(-1)
return similarity
def compare_output(baseline, current):
similarity = 50;
if (DEFAULT_ALGORITHM == 'ratio'):
similarity = fuzz.ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'partial_ratio'):
similarity = fuzz.partial_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'token_sort_ratio'):
similarity = fuzz.token_sort_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'partial_token_sort_ratio'):
similarity = fuzz.partial_token_sort_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'token_set_ratio'):
similarity = fuzz.token_set_ratio(baseline, current)
else:
print("Unknown similarity measure " + DEFAULT_ALGORITHM + ". Aborting")
sys.exit(-1)
return similarity
def test_asciionly(self):
for s in self.mixed_strings:
# ascii only only runs on strings
s = utils.asciidammit(s)
utils.asciionly(s)
def testRatioUnicodeString(self):
s1 = "\u00C1"
s2 = "ABCD"
score = fuzz.ratio(s1, s2)
self.assertEqual(0, score)
def test_dict_like_extract(self):
"""We should be able to use a dict-like object for choices, not only a
dict, and still get dict-like output.
"""
try:
from UserDict import UserDict
except ImportError:
from collections import UserDict
choices = UserDict({'aa': 'bb', 'a1': None})
search = 'aaa'
result = process.extract(search, choices)
self.assertTrue(len(result) > 0)
for value, confidence, key in result:
self.assertTrue(value in choices.values())
def test_service_metadata(self):
self.maxDiff = None
response = self.client.get('/api/1.0/refine/reconcile', {'callback': 'jsonp123'})
self.assertEqual(200, response.status_code)
self.assertEqual(100,
fuzz.token_sort_ratio(
'jsonp123({"name": "Influence Explorer Reconciliation3", "identifierSpace": "http://staging.influenceexplorer.com/ns/entities", "schemaspace": "http://staging.influenceexplorer.com/ns/entity.object.id", "view": { "url": "http://staging.influenceexplorer.com/entity/{{id}}" }, "preview": { "url": "http://staging.influenceexplorer.com/entity/{{id}}", "width": 430, "height": 300 }, "defaultTypes": []})',
response.content
)
def testTokenSetRatio(self):
self.assertEqual(fuzz.token_set_ratio(self.s4, self.s5), 100)
self.assertEqual(fuzz.token_set_ratio(self.s8, self.s8a, full_process=False), 100)
self.assertEqual(fuzz.token_set_ratio(self.s9, self.s9a, full_process=True), 100)
self.assertEqual(fuzz.token_set_ratio(self.s9, self.s9a, full_process=False), 100)
self.assertEqual(fuzz.token_set_ratio(self.s10, self.s10a, full_process=False), 50)
def test_fullProcess(self):
for s in self.mixed_strings:
utils.full_process(s)