Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
elif self.tokenizer_opt['type'] == 'pyonmttok':
if "params" not in self.tokenizer_opt:
raise ValueError(
"Missing mandatory tokenizer option 'params'")
import pyonmttok
if self.tokenizer_opt["mode"] is not None:
mode = self.tokenizer_opt["mode"]
else:
mode = None
# load can be called multiple times: modify copy
tokenizer_params = dict(self.tokenizer_opt["params"])
for key, value in self.tokenizer_opt["params"].items():
if key.endswith("path"):
tokenizer_params[key] = os.path.join(
self.model_root, value)
tokenizer = pyonmttok.Tokenizer(mode,
**tokenizer_params)
self.tokenizer = tokenizer
else:
raise ValueError("Invalid value for tokenizer type")
self.load_time = timer.tick()
self.reset_unload_timer()
self.loading_lock.set()
elif self.tokenizer_opt['type'] == 'pyonmttok':
if "params" not in self.tokenizer_opt:
raise ValueError(
"Missing mandatory tokenizer option 'params'")
import pyonmttok
if self.tokenizer_opt["mode"] is not None:
mode = self.tokenizer_opt["mode"]
else:
mode = None
# load can be called multiple times: modify copy
tokenizer_params = dict(self.tokenizer_opt["params"])
for key, value in self.tokenizer_opt["params"].items():
if key.endswith("path"):
tokenizer_params[key] = os.path.join(
self.model_root, value)
tokenizer = pyonmttok.Tokenizer(mode,
**tokenizer_params)
self.tokenizer = tokenizer
else:
raise ValueError("Invalid value for tokenizer type")
if self.postprocess_opt is not None:
self.logger.info("Loading postprocessor")
self.postprocessor = []
for function_path in self.postprocess_opt:
function = get_function_by_path(function_path)
self.postprocessor.append(function)
self.load_time = timer.tick()
self.reset_unload_timer()
self.loading_lock.set()
def __init__(self, **kwargs):
self._config = copy.deepcopy(kwargs)
mode = "conservative"
if "mode" in kwargs:
mode = kwargs["mode"]
del kwargs["mode"]
self._tokenizer = pyonmttok.Tokenizer(mode, **kwargs)
elif self.tokenizer_opt['type'] == 'pyonmttok':
if "params" not in self.tokenizer_opt:
raise ValueError(
"Missing mandatory tokenizer option 'params'")
import pyonmttok
if self.tokenizer_opt["mode"] is not None:
mode = self.tokenizer_opt["mode"]
else:
mode = None
# load can be called multiple times: modify copy
tokenizer_params = dict(self.tokenizer_opt["params"])
for key, value in self.tokenizer_opt["params"].items():
if key.endswith("path"):
tokenizer_params[key] = os.path.join(
self.model_root, value)
tokenizer = pyonmttok.Tokenizer(mode,
**tokenizer_params)
self.tokenizer = tokenizer
else:
raise ValueError("Invalid value for tokenizer type")
self.load_time = timer.tick()
self.reset_unload_timer()
self.loading_lock.set()
def __init__(self, export_dir):
imported = tf.saved_model.load(export_dir)
self._translate_fn = imported.signatures["serving_default"]
sp_model_path = os.path.join(export_dir, "assets.extra", "wmtende.model")
self._tokenizer = pyonmttok.Tokenizer("none", sp_model_path=sp_model_path)
def build_tokenizer(args):
"""Builds a tokenizer based on user arguments."""
import pyonmttok
local_args = {}
for k, v in six.iteritems(args):
if isinstance(v, six.string_types):
local_args[k] = v.encode('utf-8')
else:
local_args[k] = v
mode = local_args['mode']
del local_args['mode']
del local_args['vocabulary']
return pyonmttok.Tokenizer(mode, **local_args)