Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _test():
# Act like gzip; with -d, act like gunzip.
# The input file is not deleted, however, nor are any other gzip
# options or features supported.
args = sys.argv[1:]
decompress = args and args[0] == "-d"
if decompress:
args = args[1:]
if not args:
args = ["-"]
for arg in args:
if decompress:
if arg == "-":
f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)
g = sys.stdout.buffer
else:
if arg[-3:] != ".gz":
print("filename doesn't end in .gz:", repr(arg))
continue
f = open(arg, "rb")
g = builtins.open(arg[:-3], "wb")
else:
if arg == "-":
f = sys.stdin.buffer
g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer)
else:
f = builtins.open(arg, "rb")
g = open(arg + ".gz", "wb")
while True:
chunk = f.read(1024)
if args.inputFilename:
if args.outputFilename and ui_type != "none":
ui_type = "cmd" # silently? FIXME
else:
if ui_type == "cmd":
log.error("no input file given, try --help")
exit(1)
if ui_type == "none":
if args.reverse:
log.error("--reverse does not work with --ui=none")
sys.exit(1)
glos = Glossary()
glos.convert(
args.inputFilename,
inputFormat=args.inputFormat,
outputFilename=args.outputFilename,
outputFormat=args.outputFormat,
readOptions=readOptions,
writeOptions=writeOptions,
**convertOptions
)
sys.exit(0)
elif ui_type == "cmd":
from ui import ui_cmd
sys.exit(0 if ui_cmd.UI().run(
args.inputFilename,
outputFilename=args.outputFilename,
inputFormat=args.inputFormat,
core.StdLogHandler(noColor=args.noColor),
)
# with the logger setted up, we can import other pyglossary modules, so they
# can do some loggging in right way.
core.checkCreateConfDir()
##############################
from pyglossary.glossary import Glossary
from ui.ui_cmd import COMMAND, help, parseFormatOptionsStr
if args.verbosity != defaultVerbosity:
Glossary.init()
##############################
def dashToCamelCase(text): # converts "hello-PYTHON-user" to "helloPythonUser"
parts = text.split("-")
parts[0] = parts[0].lower()
for i in range(1, len(parts)):
parts[i] = parts[i].capitalize()
return "".join(parts)
ui_list = (
"gtk",
"tk",
"qt",
)
self.progressEnd()
yield wordCount
@classmethod
def init(cls):
cls.readFormats = []
cls.writeFormats = []
cls.readExt = []
cls.writeExt = []
cls.readDesc = []
cls.writeDesc = []
cls.loadPlugins(join(dirname(__file__), "plugins"))
cls.loadPlugins(userPluginsDir)
Glossary.init()
prefOptions = {}
for param in prefOptionsKeys:
value = getattr(args, param, None)
if value is not None:
prefOptions[param] = value
convertOptions = {}
for param in convertOptionsKeys:
value = getattr(args, param, None)
if value is not None:
convertOptions[param] = value
if args.inputFilename and readOptions:
inputFormat = Glossary.detectInputFormat(args.inputFilename, format=args.inputFormat)
if not inputFormat:
log.error("Could not detect format for input file %s" % args.inputFilename)
sys.exit(1)
readOptionsProp = Glossary.formatsOptionsProp[inputFormat]
for optName, optValue in readOptions.items():
if optName not in Glossary.formatsReadOptions[inputFormat]:
log.error("Invalid option name %s for format %s" % (optName, inputFormat))
sys.exit(1)
prop = readOptionsProp[optName]
optValueNew, ok = prop.evaluate(optValue)
if not ok or not prop.validate(optValueNew):
log.error("Invalid option value %s=%r for format %s" % (optName, optValue, inputFormat))
sys.exit(1)
readOptions[optName] = optValueNew
if args.outputFilename and writeOptions:
def replaceAsciiCharRefs(b_text, encoding):
# “
# ċ
if log.isDebug():
assert isinstance(b_text, bytes)
b_parts = re.split(b_pat_ascii_char_ref, b_text)
for i_part, b_part in enumerate(b_parts):
if i_part % 2 != 1:
continue
# reference
try:
if b_part[:3].lower() == "&#x":
code = int(b_part[3:-1], 16)
else:
code = int(b_part[2:-1])
if code <= 0:
raise ValueError()
except (ValueError, OverflowError):
code = -1
if code < 128 or code > 255:
def _loadedEntryGen(self) -> Iterator[BaseEntry]:
wordCount = len(self._data)
progressbar = self.ui and self._progressbar
if progressbar:
self.progressInit("Writing")
for index, rawEntry in enumerate(self._data):
yield Entry.fromRaw(
rawEntry,
defaultDefiFormat=self._defaultDefiFormat
)
if progressbar:
self.progress(index, wordCount)
if progressbar:
self.progressEnd()
# defi = "\n".join([RLM+line for line in defi.split("\n")])
# for GoldenDict ^^ FIXME
return entry
def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
langs = (
self.glos.getInfo("sourceLang") +
self.glos.getInfo("targetLang")
).lower()
if "persian" in langs or "farsi" in langs:
entry = self.run_fa(entry)
return entry
class CleanEntryFilter(EntryFilter): # FIXME
name = "clean"
desc = "Clean"
def cleanDefi(self, st: str) -> str:
st = st.replace("♦ ", "♦ ")
st = re.sub("[\r\n]+", "\n", st)
st = re.sub(" *\n *", "\n", st)
"""
This code may correct snippets like:
- First sentence .Second sentence. -> First sentence. Second sentence.
- First clause ,second clause. -> First clause, second clause.
But there are cases when this code have undesirable effects
( "<" represented as "<" in HTML markup):
- -> < Adj. >
- -> < fig. >
entry.editFuncDefi(fixStr)
return entry
class SkipDataEntryFilter(EntryFilter):
name = "skip_resources"
desc = "Skip Resources"
def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
if entry.isData():
return
return entry
class LangEntryFilter(EntryFilter):
name = "lang"
desc = "Language-dependent Filters"
def run_fa(self, entry: BaseEntry) -> Optional[BaseEntry]:
from pyglossary.persian_utils import faEditStr
entry.editFuncWord(faEditStr)
entry.editFuncDefi(faEditStr)
# RLM = "\xe2\x80\x8f"
# defi = "\n".join([RLM+line for line in defi.split("\n")])
# for GoldenDict ^^ FIXME
return entry
def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
langs = (
self.glos.getInfo("sourceLang") +
self.glos.getInfo("targetLang")
def openGzip(self):
with open(self._filename, "rb") as bglFile:
if not bglFile:
log.error("file pointer empty: %s", bglFile)
return False
b_head = bglFile.read(6)
if len(b_head) < 6 or not b_head[:4] in (
b"\x12\x34\x00\x01",
b"\x12\x34\x00\x02",
):
log.error("invalid header: %r", b_head[:6])
return False
self.gzipOffset = gzipOffset = binStrToInt(b_head[4:6])
log.debug("Position of gz header: %s", gzipOffset)
if gzipOffset < 6:
log.error("invalid gzip header position: %s", gzipOffset)
return False
self.file = BGLGzipFile(
fileobj=FileOffS(self._filename, gzipOffset),
closeFileobj=True,
)
return True