Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def syllabify_orth_with_pyphen(token,num_sylls=None):
global Pyphen
if not Pyphen: Pyphen=pyphen.Pyphen(lang='en_US')
sylls = Pyphen.inserted(token,hyphen='||||').split('||||')
#if len(sylls)==num_sylls: return sylls
#return []
return sylls
warnings.warn(
"The 'lang' argument has been moved to "
"'textstat.set_lang()'. This argument will be removed "
"in the future.",
DeprecationWarning
)
if isinstance(text, bytes):
text = text.decode(self.text_encoding)
text = text.lower()
text = self.remove_punctuation(text)
if not text:
return 0
dic = Pyphen(lang=self.__lang)
count = 0
for word in text.split(' '):
word_hyphenated = dic.inserted(word)
count += max(1, word_hyphenated.count("-") + 1)
return count
def main():
"Main function"
parser = get_parser()
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
hyphenator = pyphen.Pyphen(filename=args.pat_path,left=1,right=1)
outfd = sys.stdout
if args.outfile != False:
outfd = open(args.outfile, 'w', encoding='utf8')
file_list = get_file_list(args.path)
nb_errors = 0
for f in file_list:
nb_errors += check_file(f, hyphenator, outfd, args.verbose)
if len(file_list) > 1 and nb_errors > 0:
outfd.write('Total errors: '+str(nb_errors)+'\n')
elif nb_errors == 0 and not args.verbose:
outfd.write('No error!\n')
if outfd is not sys.stdout:
outfd.close()
if nb_errors == 0:
sys.exit(0)
else:
@repoze.lru.lru_cache(maxsize=128)
def syllable_count(self, text, lang='en_US'):
"""
Function to calculate syllable words in a text.
I/P - a text
O/P - number of syllable words
"""
text = text.lower()
text = "".join(x for x in text if x not in exclude)
if not text:
return 0
dic = Pyphen(lang=lang)
count = 0
for word in text.split(' '):
word_hyphenated = dic.inserted(word)
count += max(1, word_hyphenated.count("-") + 1)
return count
from collections import defaultdict
from music21 import converter, instrument, note, chord
import pyphen
dic = pyphen.Pyphen(lang='en')
def tokenize(text,midiPath):
new_text = ""
i = 0
for n in notesPerVerse(midiPath):
verse = cleanText(text[i])
new_text += "|".join(vocals(verse,n)) + "|"
i = (i+1)%len(text)
new_text = new_text.strip()
return list(new_text.split("|"))
def notesPerVerse(midiFile):
mid = converter.parse(midiFile)
charset = 'cp1251'
for pattern in stream:
pattern = pattern.decode(charset).strip()
if not pattern or pattern.startswith((
'%', '#', 'LEFTHYPHENMIN', 'RIGHTHYPHENMIN',
'COMPOUNDLEFTHYPHENMIN', 'COMPOUNDRIGHTHYPHENMIN')):
continue
# replace ^^hh with the real character
pattern = parse_hex(
lambda match: chr(int(match.group(1), 16)), pattern)
# read nonstandard hyphen alternatives
if '/' in pattern:
pattern, alternative = pattern.split('/', 1)
factory = AlternativeParser(pattern, alternative)
else:
factory = int
tags, values = zip(*[
(string, factory(i or '0'))
for i, string in parse(pattern)])
# if only zeros, skip this pattern
if max(values) == 0:
continue
# chop zeros from beginning and end, and store start offset
start, end = 0, len(values)
while not values[start]:
start += 1
while not values[end - 1]:
def setLocale(self,locale):
"""
Sets locale-related data.
"""
if os.path.exists(locale):
self.hyphenator = pyphen.Pyphen(filename=locale)
elif len(locale) > 1 and locale in pyphen.LANGUAGES:
self.hyphenator = pyphen.Pyphen(lang=locale)
self.setTokenizeLanguage(locale)
else:
raise LookupError("provided locale not supported by pyphen")
resume_at = len(
(first_line_text + u' ').encode('utf8'))
else:
first_line_text, next_word = u'', first_line_text
soft_hyphen_indexes = [
match.start() for match in
re.finditer(soft_hyphen, next_word)]
soft_hyphen_indexes.reverse()
dictionary_iterations = [
next_word[:i + 1] for i in soft_hyphen_indexes]
elif hyphens == 'auto' and lang:
# The next word does not fit, try hyphenation
dictionary_key = (lang, left, right, total)
dictionary = PYPHEN_DICTIONARY_CACHE.get(dictionary_key)
if dictionary is None:
dictionary = pyphen.Pyphen(
lang=lang, left=left, right=right)
PYPHEN_DICTIONARY_CACHE[dictionary_key] = dictionary
dictionary_iterations = [
start for start, end in dictionary.iterate(next_word)]
else:
dictionary_iterations = []
if dictionary_iterations:
for first_word_part in dictionary_iterations:
new_first_line_text = first_line_text + first_word_part
hyphenated_first_line_text = (
new_first_line_text + style.hyphenate_character)
new_layout = create_layout(
hyphenated_first_line_text, style, context, max_width,
justification_spacing)
new_lines = new_layout.iter_lines()
def __init__(self, language='en'):
self.dic = pyphen.Pyphen(lang=language)
def _pyphen(words):
if 'en' not in pyphen.LANGUAGES:
print('pyphen: No English dictionary!')
return
dct = pyphen.Pyphen(lang='en')
syllables = []
for word in words:
syl = dct.inserted(word).count('-') + 1
syllables.append(syl)
return syllables