Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def load_word2vec(path, delimiter=' ', cache=True) -> Tuple[Dict[str, np.ndarray], int]:
realpath = get_resource(path)
binpath = replace_ext(realpath, '.pkl')
if cache:
try:
word2vec, dim = load_pickle(binpath)
logger.debug(f'Loaded {binpath}')
return word2vec, dim
except IOError:
pass
dim = None
word2vec = dict()
with open(realpath, encoding='utf-8', errors='ignore') as f:
for idx, line in enumerate(f):
line = line.rstrip().split(delimiter)
if len(line) > 2:
if dim is None:
dim = len(line)
else:
if len(line) != dim:
logger.warning('{}#{} length mismatches with {}'.format(path, idx + 1, dim))
continue
def x_to_idx(self, x) -> Union[tf.Tensor, Tuple]:
logger.fatal('map_x should always be set to True')
exit(1)
def samples_to_dataset(self, samples: Generator, map_x=None, map_y=None, batch_size=32, shuffle=None, repeat=None,
drop_remainder=False,
prefetch=1, cache=True) -> tf.data.Dataset:
output_types, output_shapes, padding_values = self.output_types, self.output_shapes, self.padding_values
if not all(v for v in [output_shapes, output_shapes,
padding_values]):
# print('Did you forget to call build_config() on your transform?')
self.build_config()
output_types, output_shapes, padding_values = self.output_types, self.output_shapes, self.padding_values
assert all(v for v in [output_shapes, output_shapes,
padding_values]), 'Your create_types_shapes_values returns None, which is not allowed'
# if not callable(samples):
# samples = Transform.generator_to_callable(samples)
dataset = tf.data.Dataset.from_generator(samples, output_types=output_types, output_shapes=output_shapes)
if cache:
logger.debug('Dataset cache enabled')
dataset = dataset.cache(cache if isinstance(cache, str) else '')
if shuffle:
if isinstance(shuffle, bool):
shuffle = 1024
dataset = dataset.shuffle(shuffle)
if repeat:
dataset = dataset.repeat(repeat)
if batch_size:
dataset = dataset.padded_batch(batch_size, output_shapes, padding_values, drop_remainder)
if prefetch:
dataset = dataset.prefetch(prefetch)
if map_x is None:
map_x = self.map_x
if map_y is None:
map_y = self.map_y
if map_x or map_y:
realpath = str(Path(realpath).parent.joinpath(parts[0]))
anchor = '/'.join(parts[1:])
child = path_join(realpath, anchor)
if os.path.exists(child):
return child
elif os.path.isdir(realpath) or (os.path.isfile(realpath) and (compressed and extract)):
return realpath
else:
pattern = realpath + '*'
files = glob.glob(pattern)
zip_path = realpath + compressed
if extract and zip_path in files:
files.remove(zip_path)
if files:
if len(files) > 1:
logger.debug(f'Found multiple files with {pattern}, will use the first one.')
return files[0]
# realpath is where its path after exaction
if compressed:
realpath += compressed
if not os.path.isfile(realpath):
path = download(url=path, save_path=realpath)
else:
path = realpath
if extract and compressed:
path = uncompress(path)
if anchor:
path = path_join(path, anchor)
return path
def serve(self, export_dir=None, grpc_port=8500, rest_api_port=0, overwrite=False, dry_run=False):
export_dir = self.export_model_for_serving(export_dir, show_hint=False, overwrite=overwrite)
if not dry_run:
del self.model # free memory
logger.info('The inputs of exported model is shown below.')
os.system(f'saved_model_cli show --all --dir {export_dir}/1')
cmd = f'nohup tensorflow_model_server --model_name={os.path.splitext(os.path.basename(self.meta["load_path"]))[0]} ' \
f'--model_base_path={export_dir} --port={grpc_port} --rest_api_port={rest_api_port} ' \
f'>serve.log 2>&1 &'
logger.info(f'Running ...\n{cmd}')
if not dry_run:
os.system(cmd)
def __init__(self, filepath: str, padding=PAD, name=None, **kwargs):
self.padding = padding.encode('utf-8')
self.filepath = filepath
filepath = get_resource(filepath)
assert os.path.isfile(filepath), f'Resolved path {filepath} is not a file'
existed = global_cache.get(filepath, None)
if existed:
logger.debug('Use cached fasttext model [{}].'.format(filepath))
self.model = existed
else:
logger.debug('Loading fasttext model from [{}].'.format(filepath))
# fasttext print a blank line here
with stdout_redirected(to=os.devnull, stdout=sys.stderr):
self.model = fasttext.load_model(filepath)
global_cache[filepath] = self.model
kwargs.pop('input_dim', None)
kwargs.pop('output_dim', None)
kwargs.pop('mask_zero', None)
if not name:
name = os.path.splitext(os.path.basename(filepath))[0]
super().__init__(input_dim=len(self.model.words), output_dim=self.model['king'].size,
mask_zero=padding is not None, trainable=False, dtype=tf.string, name=name, **kwargs)
embed_fn = np.frompyfunc(self.embed, 1, 1)
# vf = np.vectorize(self.embed, otypes=[np.ndarray])
self._embed_np = embed_fn