How to use the pyglossary.text_utils.excMessage function in pyglossary

To help you get started, we’ve selected a few pyglossary examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
returns u_word_main, as str instance (utf-8 encoding)
		"""
		b_word_main, strip_count = stripDollarIndexes(b_word)
		if strip_count > 1:
			log.debug(
				"processKey(%s):\n" % b_word +
				"number of dollar indexes = %s" % strip_count,
			)
		# convert to unicode
		if self.strictStringConvertion:
			try:
				u_word_main = b_word_main.decode(self.sourceEncoding)
			except UnicodeError:
				log.debug(
					"processKey(%s):\n" % b_word +
					"conversion error:\n%s" % excMessage()
				)
				u_word_main = b_word_main.decode(
					self.sourceEncoding,
					"ignore",
				)
		else:
			u_word_main = b_word_main.decode(self.sourceEncoding, "ignore")

		if self.processHtmlInKey:
			# u_word_main_orig = u_word_main
			u_word_main = stripHtmlTags(u_word_main)
			u_word_main = replaceHtmlEntriesInKeys(u_word_main)
#			if(re.match(".*[&<>].*", u_word_main_orig)):
#				log.debug("original text: " + u_word_main_orig + "\n" \
#						  + "new      text: " + u_word_main + "\n")
		u_word_main = removeControlChars(u_word_main)
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
def processAlternativeKey(self, b_word, b_key):
		"""
			b_word is a bytes instance
			returns u_word_main, as str instance (utf-8 encoding)
		"""
		b_word_main, strip_count = stripDollarIndexes(b_word)
		# convert to unicode
		if self.strictStringConvertion:
			try:
				u_word_main = b_word_main.decode(self.sourceEncoding)
			except UnicodeError:
				log.debug(
					"processAlternativeKey(%s)\n" % b_word +
					"key = %s:\n" % b_key +
					"conversion error:\n%s" % excMessage()
				)
				u_word_main = b_word_main.decode(self.sourceEncoding, "ignore")
		else:
			u_word_main = b_word_main.decode(self.sourceEncoding, "ignore")

		# strip "/" before words
		u_word_main = re.sub(
			self.stripSlashAltKeyPattern,
			r"\1\2",
			u_word_main,
		)

		if self.processHtmlInKey:
			# u_word_main_orig = u_word_main
			u_word_main = stripHtmlTags(u_word_main)
			u_word_main = replaceHtmlEntriesInKeys(u_word_main)
github ilius / pyglossary / pyglossary / plugins / babylon_bgl.py View on Github external
continue
                        code = int(ref, 16)
                        utf8_text += chr(code)
                else:
                    self.char_references_statistics(text2, encoding)
                    if encoding == 'cp1252':
                        text2 = self.replace_ascii_char_refs(text2, encoding)
                    if self.strictStringConvertion:
                        try:
                            u_text = text2.decode(encoding)
                        except UnicodeError:
                            log.debug(
                                'decode_charset_tags({0})\n'
                                'fragment({1})\n'
                                'conversion error:\n{2}'
                                .format(text, text2, excMessage())
                            )
                            u_text = text2.decode(encoding, 'replace')
                    else:
                        u_text = text2.decode(encoding, 'replace')
                    utf8_text += u_text
                    if encoding != defaultEncoding:
                        defaultEncodingOnly = False
            elif i % 3 == 1: #  or 
                if parts[i].startswith('
                    if len(encodings) > 0:
                        del encodings[-1]
                    else:
                        log.debug(
                            'decode_charset_tags({0})\n'
                            'unbalanced  tag\n'
github ilius / pyglossary / pyglossary / plugins / babylon_bgl.py View on Github external
def processEntryKey(self, word):
        """
            Return entry key in utf-8 encoding
        """
        main_word, strip_cnt = self.stripDollarIndexes(word)
        if strip_cnt > 1:
            log.debug('processEntryKey({0}):\nnumber of dollar indexes = {1}'\
                .format(word, strip_cnt))
        # convert to unicode
        if self.strictStringConvertion:
            try:
                u_main_word = main_word.decode(self.sourceEncoding)
            except UnicodeError:
                log.debug(
                    'processEntryKey({0}):\nconversion error:\n{1}'
                    .format(word, excMessage())
                )
                u_main_word = main_word.decode(self.sourceEncoding, 'ignore')
        else:
            u_main_word = main_word.decode(self.sourceEncoding, 'ignore')

        self.decoded_dump_file_write('\n\nkey: ' + u_main_word)
        utf8_main_word = u_main_word
        if self.processHtmlInKey:
            #utf8_main_word_orig = utf8_main_word
            utf8_main_word = self.strip_html_tags(utf8_main_word)
            utf8_main_word = self.replace_html_entries_in_keys(utf8_main_word)
            #if(re.match('.*[&<>].*', utf8_main_word_orig)):
                #log.debug('original text: ' + utf8_main_word_orig + '\n' \
                        #+ 'new      text: ' + utf8_main_word + '\n')
        utf8_main_word = self.remove_control_chars(utf8_main_word)
        utf8_main_word = self.replace_new_lines(utf8_main_word)
github ilius / pyglossary / pyglossary / plugins / babylon_bgl.py View on Github external
def processEntryAlternativeKey(self, raw_word, raw_key):
        main_word, strip_cnt = self.stripDollarIndexes(raw_word)
        # convert to unicode
        if self.strictStringConvertion:
            try:
                u_main_word = main_word.decode(self.sourceEncoding)
            except UnicodeError:
                log.debug(
                    'processEntryAlternativeKey({0})\nkey = {1}:\nconversion error:\n{2}'
                    .format(raw_word, raw_key, excMessage())
                )
                u_main_word = main_word.decode(self.sourceEncoding, 'ignore')
        else:
            u_main_word = main_word.decode(self.sourceEncoding, 'ignore')

        # strip '/' before words
        u_main_word = re.sub(self.strip_slash_alt_key_pat, r'\1\2', u_main_word)

        self.decoded_dump_file_write('\nalt: ' + u_main_word)

        utf8_main_word = u_main_word
        if self.processHtmlInKey:
            #utf8_main_word_orig = utf8_main_word
            utf8_main_word = self.strip_html_tags(utf8_main_word)
            utf8_main_word = self.replace_html_entries_in_keys(utf8_main_word)
            #if(re.match('.*[&<>].*', utf8_main_word_orig)):
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
)
							continue
						u_text += chr(int(b_ref, 16))
				else:
					self.charReferencesStat(b_text2, encoding)
					if encoding == "cp1252":
						b_text2 = replaceAsciiCharRefs(b_text2, encoding)
					if self.strictStringConvertion:
						try:
							u_text2 = b_text2.decode(encoding)
						except UnicodeError:
							log.debug(
								"decoding charset tags" +
								", b_text=%r" % b_text +
								"\nfragment: %r" % b_text2 +
								"\nconversion error:\n%s" % excMessage()
							)
							u_text2 = text2.decode(encoding, "replace")
					else:
						u_text2 = b_text2.decode(encoding, "replace")
					u_text += u_text2
					if encoding != defaultEncoding:
						defaultEncodingOnly = False
			elif i % 3 == 1:  #  or 
				if b_part.startswith(b"
					if encodings:
						encodings.pop()
					else:
						log.debug(
							"decoding charset tags, b_text=%r\n" % b_text +
							"unbalanced  tag\n"