Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def extract_dep_feature(dep_parser, text, topic_ent, question_word):
dep = dep_parser.raw_parse(text).__next__()
tree = list(dep.triples())
topic_ent = list(set(tokenize(topic_ent)) - stop_words)
text = text.split()
path_len = 1e5
topic_ent_to_root = []
for each in topic_ent:
ret = process.extractOne(each, text, scorer=fuzz.token_sort_ratio)
if ret[1] < 85:
continue
tmp = find_parent(ret[0], tree, '->')
if len(tmp) > 0 and len(tmp) < path_len:
topic_ent_to_root = tmp
path_len = len(tmp)
question_word_to_root = find_parent(question_word, tree)
# if len(question_word_to_root) == 0 or len(topic_ent_to_root) == 0:
# import pdb;pdb.set_trace()
return question_word_to_root + list(reversed(topic_ent_to_root[:-1]))
sj_search = get_url(decode_base64("aHR0cHM6Ly9zZXJpZW5qdW5raWVzLm9yZy9zZXJpZS9zZWFyY2g/cT0=") + sj_query,
configfile, dbfile, scraper)
try:
sj_results = BeautifulSoup(sj_search, 'lxml').findAll("a", href=re.compile("/serie"))
except:
sj_results = []
if special:
append = " (" + special + ")"
else:
append = ""
i = 0
results = {}
for result in sj_results:
r_title = result.text
r_rating = fuzz.ratio(title.lower(), r_title)
if r_rating > 40:
res = {"payload": encode_base64(result['href'] + "|" + r_title + "|" + str(special)),
"title": r_title + append}
results["result" + str(i + 1000)] = res
i += 1
sj_final = results
return bl_final, sj_final
def search(self, name, threshold=80):
matches = []
for manufacturer in self.manufacturers:
for variant in manufacturer:
# Search with false name
ratio = fuzz.ratio(variant.lower(), name.lower())
if ratio > threshold:
matches.append((manufacturer[0], ratio))
return sorted(matches, key=lambda x: x[1], reverse=True)
def get_combined_fuzz_score(self, a, b, mode='geom_mean'):
a, b = clean_name(a), clean_name(b)
simple = float(fuzz.ratio(a, b) * self.weight['simple'])
partial = float(fuzz.partial_ratio(a, b) * self.weight['partial'])
return self.combine_scores(simple, partial, mode=mode)
def put_title(self, text, level):
text = text.strip()
if not fuzz.ratio(text, g.last_title.get(level, ''), score_cutoff=92):
self.ofile.write('#'*level + ' ' + text + '\n\n')
g.last_title[level] = text
continue
try:
#if self.DEBUG:
#print("")
#print("___" + current_thing_title)
probable_thing_title_confidence = 100
if target_thing_title == None: # If no thing title provided, we go over every thing and let the property be leading in finding a match.
pass
elif target_thing_title == current_thing_title: # If the thing title is a perfect match
probable_thing_title = current_thing_title
if self.DEBUG:
print("FOUND THE CORRECT THING: " + str(current_thing_title))
elif fuzz.ratio(str(target_thing_title), current_thing_title) > 85: # If the title is a fuzzy match
if self.DEBUG:
print("This thing title is pretty similar, so it could be what we're looking for: " + str(current_thing_title))
probable_thing_title = current_thing_title
probable_thing_title_confidence = 85
elif target_space != None:
space_title = str(target_space) + " " + str(target_thing_title)
#if self.DEBUG:
# print("space title = " + str(target_space) + " + " + str(target_thing_title))
if fuzz.ratio(space_title, current_thing_title) > 85:
probable_thing_title = space_title
elif current_thing_title.startswith(target_thing_title):
if self.DEBUG:
print("partial match:" + str(len(current_thing_title) / len(target_thing_title)))
if len(current_thing_title) / len(target_thing_title) < 2:
# The strings mostly start the same, so this might be a match.
pass
elif target_thing_title == current_thing_title: # If the thing title is a perfect match
probable_thing_title = current_thing_title
if self.DEBUG:
print("FOUND THE CORRECT THING: " + str(current_thing_title))
elif fuzz.ratio(str(target_thing_title), current_thing_title) > 85: # If the title is a fuzzy match
if self.DEBUG:
print("This thing title is pretty similar, so it could be what we're looking for: " + str(current_thing_title))
probable_thing_title = current_thing_title
probable_thing_title_confidence = 85
elif target_space != None:
space_title = str(target_space) + " " + str(target_thing_title)
#if self.DEBUG:
# print("space title = " + str(target_space) + " + " + str(target_thing_title))
if fuzz.ratio(space_title, current_thing_title) > 85:
probable_thing_title = space_title
elif current_thing_title.startswith(target_thing_title):
if self.DEBUG:
print("partial match:" + str(len(current_thing_title) / len(target_thing_title)))
if len(current_thing_title) / len(target_thing_title) < 2:
# The strings mostly start the same, so this might be a match.
probable_thing_title = current_thing_title
probable_thing_title_confidence = 25
else:
# A title was provided, but we were not able to match it to the current things. Perhaps we can get a property-based match.
continue
except Exception as ex:
print("Error while trying to match title: " + str(ex))
result.append(match_dict.copy())
continue
# Looking for a value
elif target_property_title == 'value' and match_dict['thing'] != None:
result.append(match_dict.copy())
continue
# Looking for 'all' properties
elif target_property_title == 'all' and match_dict['thing'] != None:
#If all properties are desired, add all properties
result.append(match_dict.copy())
continue
# We found a good matching property title and already found a good matching thing title. # TODO: shouldn't this be higher up?
elif fuzz.ratio(current_property_title, target_property_title) > 85:
if self.DEBUG:
print("FOUND A PROPERTY WITH THE MATCHING FUZZY NAME")
if match_dict['thing'] == None:
match_dict['thing'] = current_thing_title
result.append(match_dict.copy())
else:
result = [] # Since this is a really good match, we remove any older properties we may have found.
result.append(match_dict.copy())
return result
# We're looking for a numbered property (e.g. moisture 5), and this property has that number in it. Here we favour sensors. # TODO: add ordinal support?
elif str(numerical_index) in current_property_title and target_thing_title != None:
result.append(match_dict.copy())
if thing['properties'][thing_property_key]['type'] == 'boolean' and probability_of_correct_property == 0:
def search_by_false_name(self, name, threshold=80):
"""Finds all items which match closely to all given query parameters.
Args:
name: Name to search by. Ignored if None.
threshold: Threshold for matching with RapidFuzz.
Returns:
List of matching triplets with NameItem, RapidFuzz ratio and RapidFuzz token_set_ratio
"""
matches = []
for item in self.items:
# Search with false name
ratio = fuzz.ratio(item.false_name, name)
token_set_ratio = fuzz.token_set_ratio(item.false_name.lower(), name.lower())
if ratio > threshold or token_set_ratio > threshold:
matches.append((item, ratio, token_set_ratio))
return sorted(matches, key=lambda x: x[1], reverse=True)
def __score_result(tweet, search_criteria):
score = fuzz.token_sort_ratio(tweet.text, search_criteria.content)
return score