Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
nlp = load_model(model_path)
if disable_pipes:
print("disabling pipes: {}".format(disable_pipes), file=sys.stderr)
nlp.disable_pipes(disable_pipes)
print("using : {}".format(nlp.pipe_names), file=sys.stderr)
else:
# to ensure reflect local changes of corrector
if recreate_corrector and 'JapaneseCorrector' in nlp.pipe_names:
nlp.remove_pipe('JapaneseCorrector')
corrector = JapaneseCorrector(nlp)
nlp.add_pipe(corrector, last=True)
if mode == 'A':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.A
elif mode == 'B':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.B
elif mode == 'C':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.C
else:
raise Exception('mode should be A, B or C')
print("mode is {}".format(mode), file=sys.stderr)
if not use_sentence_separator:
print("disabling sentence separator", file=sys.stderr)
nlp.tokenizer.use_sentence_separator = False
if output_path:
output = open(str(output_path), 'w')
else:
output = sys.stdout
line = ''
try:
msg += "\n 2. make sure dictionary is successfully installed."
raise ImportError(msg)
super(SudachiTokenizer, self).__init__(
name="sudachi ({})".format(mode), with_postag=with_postag,
)
try:
self._tokenizer = dictionary.Dictionary().create()
except KeyError:
msg = "Loading a dictionary fails."
msg += " ( see https://github.com/WorksApplications/SudachiPy#install-dict-packages )" # NOQA
raise KeyError(msg)
_mode = mode.capitalize()
if _mode == "A":
self._mode = tokenizer.Tokenizer.SplitMode.A
elif _mode == "B":
self._mode = tokenizer.Tokenizer.SplitMode.B
elif _mode == "C":
self._mode = tokenizer.Tokenizer.SplitMode.C
else:
raise ValueError("Invalid mode is specified. Mode should be A, B, or C.") # NOQA
if model_path:
nlp = spacy.load(model_path)
else:
nlp = spacy.load('ja_ginza')
if disable_pipes:
print("disabling pipes: {}".format(disable_pipes), file=sys.stderr)
nlp.disable_pipes(disable_pipes)
print("using : {}".format(nlp.pipe_names), file=sys.stderr)
if recreate_corrector:
if 'JapaneseCorrector' in nlp.pipe_names:
nlp.remove_pipe('JapaneseCorrector')
corrector = JapaneseCorrector(nlp)
nlp.add_pipe(corrector, last=True)
if mode == 'A':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.A
elif mode == 'B':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.B
elif mode == 'C':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.C
else:
raise Exception('mode should be A, B or C')
print("mode is {}".format(mode), file=sys.stderr)
if not use_sentence_separator:
print("disabling sentence separator", file=sys.stderr)
nlp.tokenizer.use_sentence_separator = False
if output_path:
output = open(str(output_path), 'w')
else:
output = sys.stdout
def _command_tokenize(args, print_usage):
if args.version:
print_version()
return
_input_files_checker(args, print_usage)
if args.mode == "A":
mode = tokenizer.Tokenizer.SplitMode.A
elif args.mode == "B":
mode = tokenizer.Tokenizer.SplitMode.B
else:
mode = tokenizer.Tokenizer.SplitMode.C
stdout_logger = logging.getLogger(__name__)
output = sys.stdout
if args.fpath_out:
output = open(args.fpath_out, "w", encoding="utf-8")
handler = logging.StreamHandler(output)
handler.setLevel(logging.DEBUG)
stdout_logger.addHandler(handler)
stdout_logger.setLevel(logging.DEBUG)
stdout_logger.propagate = False
print_all = args.a
print("disabling pipes: {}".format(disable_pipes), file=sys.stderr)
nlp.disable_pipes(disable_pipes)
print("using : {}".format(nlp.pipe_names), file=sys.stderr)
else:
# to ensure reflect local changes of corrector
if recreate_corrector and 'JapaneseCorrector' in nlp.pipe_names:
nlp.remove_pipe('JapaneseCorrector')
corrector = JapaneseCorrector(nlp)
nlp.add_pipe(corrector, last=True)
if mode == 'A':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.A
elif mode == 'B':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.B
elif mode == 'C':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.C
else:
raise Exception('mode should be A, B or C')
print("mode is {}".format(mode), file=sys.stderr)
if not use_sentence_separator:
print("disabling sentence separator", file=sys.stderr)
nlp.tokenizer.use_sentence_separator = False
if browser_command:
browser = webbrowser.get(browser_command)
else:
browser = None
if corpus_type:
if corpus_type == 'bccwj_ud':
doc = correct_dep(convert_files(lines)[0].to_doc(nlp.vocab, True), False)
print('Displaying first sentence with result and raw_result:', doc.text, file=sys.stderr)
spacy.require_gpu()
print("GPU enabled", file=sys.stderr)
nlp = load_model(model_path)
if disable_pipes:
print("disabling pipes: {}".format(disable_pipes), file=sys.stderr)
nlp.disable_pipes(disable_pipes)
print("using : {}".format(nlp.pipe_names), file=sys.stderr)
else:
# to ensure reflect local changes of corrector
if recreate_corrector and 'JapaneseCorrector' in nlp.pipe_names:
nlp.remove_pipe('JapaneseCorrector')
corrector = JapaneseCorrector(nlp)
nlp.add_pipe(corrector, last=True)
if mode == 'A':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.A
elif mode == 'B':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.B
elif mode == 'C':
nlp.tokenizer.mode = OriginalTokenizer.SplitMode.C
else:
raise Exception('mode should be A, B or C')
print("mode is {}".format(mode), file=sys.stderr)
if not use_sentence_separator:
print("disabling sentence separator", file=sys.stderr)
nlp.tokenizer.use_sentence_separator = False
if output_path:
output = open(str(output_path), 'w')
else:
output = sys.stdout