How to use the pyglossary.plugins.babylon_bgl.bgl_text.removeControlChars function in pyglossary

To help you get started, we’ve selected a few pyglossary examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
)
				u_word_main = b_word_main.decode(
					self.sourceEncoding,
					"ignore",
				)
		else:
			u_word_main = b_word_main.decode(self.sourceEncoding, "ignore")

		if self.processHtmlInKey:
			# u_word_main_orig = u_word_main
			u_word_main = stripHtmlTags(u_word_main)
			u_word_main = replaceHtmlEntriesInKeys(u_word_main)
#			if(re.match(".*[&<>].*", u_word_main_orig)):
#				log.debug("original text: " + u_word_main_orig + "\n" \
#						  + "new      text: " + u_word_main + "\n")
		u_word_main = removeControlChars(u_word_main)
		u_word_main = removeNewlines(u_word_main)
		u_word_main = u_word_main.lstrip()
		if self.keyRStripChars:
			u_word_main = u_word_main.rstrip(self.keyRStripChars)
		return u_word_main
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
if fields.b_title:
			fields.u_title, singleEncoding = self.decodeCharsetTags(
				fields.b_title,
				self.sourceEncoding,
			)
			fields.u_title = replaceHtmlEntries(fields.u_title)
			fields.u_title = removeControlChars(fields.u_title)

		if fields.b_title_trans:
			# sourceEncoding or targetEncoding ?
			fields.u_title_trans, singleEncoding = self.decodeCharsetTags(
				fields.b_title_trans,
				self.sourceEncoding,
			)
			fields.u_title_trans = replaceHtmlEntries(fields.u_title_trans)
			fields.u_title_trans = removeControlChars(fields.u_title_trans)

		if fields.b_transcription_50:
			if fields.code_transcription_50 == 0x10:
				# contains values like this (char codes):
				# 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07
				# this is not utf-16
				# what is this?
				pass
			elif fields.code_transcription_50 == 0x1b:
				fields.u_transcription_50, singleEncoding = \
					self.decodeCharsetTags(
						fields.b_transcription_50,
						self.sourceEncoding,
					)
				fields.u_transcription_50 = \
					replaceHtmlEntries(fields.u_transcription_50)
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
if fields.code_transcription_50 == 0x10:
				# contains values like this (char codes):
				# 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07
				# this is not utf-16
				# what is this?
				pass
			elif fields.code_transcription_50 == 0x1b:
				fields.u_transcription_50, singleEncoding = \
					self.decodeCharsetTags(
						fields.b_transcription_50,
						self.sourceEncoding,
					)
				fields.u_transcription_50 = \
					replaceHtmlEntries(fields.u_transcription_50)
				fields.u_transcription_50 = \
					removeControlChars(fields.u_transcription_50)
			elif fields.code_transcription_50 == 0x18:
				# incomplete text like:
				# t c=T>02D0;g0259;-
				# This defi normally contains fields.b_transcription_60
				# in this case.
				pass
			else:
				log.debug(
					"processDefi(%s)\n" % b_defi +
					"b_key = %s:\n" % b_key +
					"defi field 50, " +
					"unknown code: %#.2x" % fields.code_transcription_50
				)

		if fields.b_transcription_60:
			if fields.code_transcription_60 == 0x1b:
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
)
		if fields.singleEncoding:
			fields.encoding = self.targetEncoding
		fields.u_defi = fixImgLinks(fields.u_defi)
		fields.u_defi = replaceHtmlEntries(fields.u_defi)
		fields.u_defi = removeControlChars(fields.u_defi)
		fields.u_defi = normalizeNewlines(fields.u_defi)
		fields.u_defi = fields.u_defi.strip()

		if fields.b_title:
			fields.u_title, singleEncoding = self.decodeCharsetTags(
				fields.b_title,
				self.sourceEncoding,
			)
			fields.u_title = replaceHtmlEntries(fields.u_title)
			fields.u_title = removeControlChars(fields.u_title)

		if fields.b_title_trans:
			# sourceEncoding or targetEncoding ?
			fields.u_title_trans, singleEncoding = self.decodeCharsetTags(
				fields.b_title_trans,
				self.sourceEncoding,
			)
			fields.u_title_trans = replaceHtmlEntries(fields.u_title_trans)
			fields.u_title_trans = removeControlChars(fields.u_title_trans)

		if fields.b_transcription_50:
			if fields.code_transcription_50 == 0x10:
				# contains values like this (char codes):
				# 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07
				# this is not utf-16
				# what is this?
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
"b_key = %s:\n" % b_key +
					"defi field 50, " +
					"unknown code: %#.2x" % fields.code_transcription_50
				)

		if fields.b_transcription_60:
			if fields.code_transcription_60 == 0x1b:
				fields.u_transcription_60, singleEncoding = \
					self.decodeCharsetTags(
						fields.b_transcription_60,
						self.sourceEncoding,
					)
				fields.u_transcription_60 = \
					replaceHtmlEntries(fields.u_transcription_60)
				fields.u_transcription_60 = \
					removeControlChars(fields.u_transcription_60)
			else:
				log.debug(
					"processDefi(%s)\n" % b_defi +
					"b_key = %s:\n" % b_key +
					"defi field 60" +
					"unknown code: %#.2x" % fields.code_transcription_60,
				)

		if fields.b_field_1a:
			fields.u_field_1a, singleEncoding = self.decodeCharsetTags(
				fields.b_field_1a,
				self.sourceEncoding,
			)

		self.processDefiStat(fields, b_defi, b_key)
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
# strip "/" before words
		u_word_main = re.sub(
			self.stripSlashAltKeyPattern,
			r"\1\2",
			u_word_main,
		)

		if self.processHtmlInKey:
			# u_word_main_orig = u_word_main
			u_word_main = stripHtmlTags(u_word_main)
			u_word_main = replaceHtmlEntriesInKeys(u_word_main)
#			if(re.match(".*[&<>].*", u_word_main_orig)):
#				log.debug("original text: " + u_word_main_orig + "\n" \
#						+ "new      text: " + u_word_main + "\n")
		u_word_main = removeControlChars(u_word_main)
		u_word_main = removeNewlines(u_word_main)
		u_word_main = u_word_main.lstrip()
		u_word_main = u_word_main.rstrip(self.keyRStripChars)
		return u_word_main