How to use pyphen - 10 common examples

To help you get started, we’ve selected a few pyphen examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github quadrismegistus / prosodic / prosodic / dicts / en / english.py View on Github external
def syllabify_orth_with_pyphen(token,num_sylls=None):
	global Pyphen
	if not Pyphen: Pyphen=pyphen.Pyphen(lang='en_US')
	sylls = Pyphen.inserted(token,hyphen='||||').split('||||')
	#if len(sylls)==num_sylls: return sylls
	#return []
	return sylls
github shivam5992 / textstat / textstat / textstat.py View on Github external
warnings.warn(
                "The 'lang' argument has been moved to "
                "'textstat.set_lang()'. This argument will be removed "
                "in the future.",
                DeprecationWarning
            )
        if isinstance(text, bytes):
            text = text.decode(self.text_encoding)

        text = text.lower()
        text = self.remove_punctuation(text)

        if not text:
            return 0

        dic = Pyphen(lang=self.__lang)
        count = 0
        for word in text.split(' '):
            word_hyphenated = dic.inserted(word)
            count += max(1, word_hyphenated.count("-") + 1)
        return count
github gregorio-project / gregorio / contrib / checkSyllabation.py View on Github external
def main():
    "Main function"
    parser = get_parser()
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args()
    hyphenator = pyphen.Pyphen(filename=args.pat_path,left=1,right=1)
    outfd = sys.stdout
    if args.outfile != False:
        outfd = open(args.outfile, 'w', encoding='utf8')
    file_list = get_file_list(args.path)
    nb_errors = 0
    for f in file_list:
        nb_errors += check_file(f, hyphenator, outfd, args.verbose)
    if len(file_list) > 1 and nb_errors > 0:
        outfd.write('Total errors: '+str(nb_errors)+'\n')
    elif nb_errors == 0 and not args.verbose:
        outfd.write('No error!\n')
    if outfd is not sys.stdout:
        outfd.close()
    if nb_errors == 0:
        sys.exit(0)
    else:
github shivam5992 / textstat / textstat / textstat.py View on Github external
    @repoze.lru.lru_cache(maxsize=128)
    def syllable_count(self, text, lang='en_US'):
        """
        Function to calculate syllable words in a text.
        I/P - a text
        O/P - number of syllable words
        """
        text = text.lower()
        text = "".join(x for x in text if x not in exclude)

        if not text:
            return 0

        dic = Pyphen(lang=lang)
        count = 0
        for word in text.split(' '):
            word_hyphenated = dic.inserted(word)
            count += max(1, word_hyphenated.count("-") + 1)
        return count
github mathigatti / midi2voice / lyrics_tokenizer.py View on Github external
from collections import defaultdict
from music21 import converter, instrument, note, chord
import pyphen
dic = pyphen.Pyphen(lang='en')

def tokenize(text,midiPath):
	new_text = "" 
	i = 0
	for n in notesPerVerse(midiPath):
		verse = cleanText(text[i])
		new_text += "|".join(vocals(verse,n)) + "|"
		i = (i+1)%len(text)

	new_text = new_text.strip()
	return list(new_text.split("|"))

def notesPerVerse(midiFile):

	mid = converter.parse(midiFile)
github Kozea / Pyphen / pyphen / __init__.py View on Github external
charset = 'cp1251'
            for pattern in stream:
                pattern = pattern.decode(charset).strip()
                if not pattern or pattern.startswith((
                        '%', '#', 'LEFTHYPHENMIN', 'RIGHTHYPHENMIN',
                        'COMPOUNDLEFTHYPHENMIN', 'COMPOUNDRIGHTHYPHENMIN')):
                    continue

                # replace ^^hh with the real character
                pattern = parse_hex(
                    lambda match: chr(int(match.group(1), 16)), pattern)

                # read nonstandard hyphen alternatives
                if '/' in pattern:
                    pattern, alternative = pattern.split('/', 1)
                    factory = AlternativeParser(pattern, alternative)
                else:
                    factory = int

                tags, values = zip(*[
                    (string, factory(i or '0'))
                    for i, string in parse(pattern)])

                # if only zeros, skip this pattern
                if max(values) == 0:
                    continue

                # chop zeros from beginning and end, and store start offset
                start, end = 0, len(values)
                while not values[start]:
                    start += 1
                while not values[end - 1]:
github wimmuskee / readability-score / readability_score / textanalyzer.py View on Github external
def setLocale(self,locale):
        """
        Sets locale-related data.
        """
        if os.path.exists(locale):
            self.hyphenator = pyphen.Pyphen(filename=locale)
        elif len(locale) > 1 and locale in pyphen.LANGUAGES:
            self.hyphenator = pyphen.Pyphen(lang=locale)
            self.setTokenizeLanguage(locale)
        else:
            raise LookupError("provided locale not supported by pyphen")
github Kozea / WeasyPrint / weasyprint / text.py View on Github external
resume_at = len(
                            (first_line_text + u' ').encode('utf8'))
                    else:
                        first_line_text, next_word = u'', first_line_text
                soft_hyphen_indexes = [
                    match.start() for match in
                    re.finditer(soft_hyphen, next_word)]
                soft_hyphen_indexes.reverse()
                dictionary_iterations = [
                    next_word[:i + 1] for i in soft_hyphen_indexes]
            elif hyphens == 'auto' and lang:
                # The next word does not fit, try hyphenation
                dictionary_key = (lang, left, right, total)
                dictionary = PYPHEN_DICTIONARY_CACHE.get(dictionary_key)
                if dictionary is None:
                    dictionary = pyphen.Pyphen(
                        lang=lang, left=left, right=right)
                    PYPHEN_DICTIONARY_CACHE[dictionary_key] = dictionary
                dictionary_iterations = [
                    start for start, end in dictionary.iterate(next_word)]
            else:
                dictionary_iterations = []

            if dictionary_iterations:
                for first_word_part in dictionary_iterations:
                    new_first_line_text = first_line_text + first_word_part
                    hyphenated_first_line_text = (
                        new_first_line_text + style.hyphenate_character)
                    new_layout = create_layout(
                        hyphenated_first_line_text, style, context, max_width,
                        justification_spacing)
                    new_lines = new_layout.iter_lines()
github SmokinCaterpillar / TrufflePig / trufflepig / filters / stylemeasures.py View on Github external
def __init__(self, language='en'):
        self.dic = pyphen.Pyphen(lang=language)
github SwagLyrics / autosynch / src / hyphenate.py View on Github external
def _pyphen(words):
    if 'en' not in pyphen.LANGUAGES:
        print('pyphen: No English dictionary!')
        return
    dct = pyphen.Pyphen(lang='en')

    syllables = []
    for word in words:
        syl = dct.inserted(word).count('-') + 1
        syllables.append(syl)
    return syllables

pyphen

Pure Python module to hyphenate text

(GPL-2.0 OR LGPL-2.0 OR MPL-1…
Latest version published 23 days ago

Package Health Score

79 / 100
Full package analysis