How to use the hanlp.utils.log_util.logger function in hanlp

To help you get started, we’ve selected a few hanlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hankcs / HanLP / hanlp / utils / io_util.py View on Github external
def load_word2vec(path, delimiter=' ', cache=True) -> Tuple[Dict[str, np.ndarray], int]:
    realpath = get_resource(path)
    binpath = replace_ext(realpath, '.pkl')
    if cache:
        try:
            word2vec, dim = load_pickle(binpath)
            logger.debug(f'Loaded {binpath}')
            return word2vec, dim
        except IOError:
            pass

    dim = None
    word2vec = dict()
    with open(realpath, encoding='utf-8', errors='ignore') as f:
        for idx, line in enumerate(f):
            line = line.rstrip().split(delimiter)
            if len(line) > 2:
                if dim is None:
                    dim = len(line)
                else:
                    if len(line) != dim:
                        logger.warning('{}#{} length mismatches with {}'.format(path, idx + 1, dim))
                        continue
github hankcs / HanLP / hanlp / components / classifiers / transformer_classifier.py View on Github external
def x_to_idx(self, x) -> Union[tf.Tensor, Tuple]:
        logger.fatal('map_x should always be set to True')
        exit(1)
github hankcs / HanLP / hanlp / common / transform.py View on Github external
def samples_to_dataset(self, samples: Generator, map_x=None, map_y=None, batch_size=32, shuffle=None, repeat=None,
                           drop_remainder=False,
                           prefetch=1, cache=True) -> tf.data.Dataset:
        output_types, output_shapes, padding_values = self.output_types, self.output_shapes, self.padding_values
        if not all(v for v in [output_shapes, output_shapes,
                               padding_values]):
            # print('Did you forget to call build_config() on your transform?')
            self.build_config()
            output_types, output_shapes, padding_values = self.output_types, self.output_shapes, self.padding_values
        assert all(v for v in [output_shapes, output_shapes,
                               padding_values]), 'Your create_types_shapes_values returns None, which is not allowed'
        # if not callable(samples):
        #     samples = Transform.generator_to_callable(samples)
        dataset = tf.data.Dataset.from_generator(samples, output_types=output_types, output_shapes=output_shapes)
        if cache:
            logger.debug('Dataset cache enabled')
            dataset = dataset.cache(cache if isinstance(cache, str) else '')
        if shuffle:
            if isinstance(shuffle, bool):
                shuffle = 1024
            dataset = dataset.shuffle(shuffle)
        if repeat:
            dataset = dataset.repeat(repeat)
        if batch_size:
            dataset = dataset.padded_batch(batch_size, output_shapes, padding_values, drop_remainder)
        if prefetch:
            dataset = dataset.prefetch(prefetch)
        if map_x is None:
            map_x = self.map_x
        if map_y is None:
            map_y = self.map_y
        if map_x or map_y:
github hankcs / HanLP / hanlp / utils / io_util.py View on Github external
realpath = str(Path(realpath).parent.joinpath(parts[0]))
                anchor = '/'.join(parts[1:])
            child = path_join(realpath, anchor)
            if os.path.exists(child):
                return child
        elif os.path.isdir(realpath) or (os.path.isfile(realpath) and (compressed and extract)):
            return realpath
        else:
            pattern = realpath + '*'
            files = glob.glob(pattern)
            zip_path = realpath + compressed
            if extract and zip_path in files:
                files.remove(zip_path)
            if files:
                if len(files) > 1:
                    logger.debug(f'Found multiple files with {pattern}, will use the first one.')
                return files[0]
        # realpath is where its path after exaction
        if compressed:
            realpath += compressed
        if not os.path.isfile(realpath):
            path = download(url=path, save_path=realpath)
        else:
            path = realpath
    if extract and compressed:
        path = uncompress(path)
        if anchor:
            path = path_join(path, anchor)

    return path
github hankcs / HanLP / hanlp / common / component.py View on Github external
def serve(self, export_dir=None, grpc_port=8500, rest_api_port=0, overwrite=False, dry_run=False):
        export_dir = self.export_model_for_serving(export_dir, show_hint=False, overwrite=overwrite)
        if not dry_run:
            del self.model  # free memory
        logger.info('The inputs of exported model is shown below.')
        os.system(f'saved_model_cli show --all --dir {export_dir}/1')
        cmd = f'nohup tensorflow_model_server --model_name={os.path.splitext(os.path.basename(self.meta["load_path"]))[0]} ' \
              f'--model_base_path={export_dir} --port={grpc_port} --rest_api_port={rest_api_port} ' \
              f'>serve.log 2>&1 &'
        logger.info(f'Running ...\n{cmd}')
        if not dry_run:
            os.system(cmd)
github hankcs / HanLP / hanlp / layers / embeddings / fast_text.py View on Github external
def __init__(self, filepath: str, padding=PAD, name=None, **kwargs):
        self.padding = padding.encode('utf-8')
        self.filepath = filepath
        filepath = get_resource(filepath)
        assert os.path.isfile(filepath), f'Resolved path {filepath} is not a file'
        existed = global_cache.get(filepath, None)
        if existed:
            logger.debug('Use cached fasttext model [{}].'.format(filepath))
            self.model = existed
        else:
            logger.debug('Loading fasttext model from [{}].'.format(filepath))
            # fasttext print a blank line here
            with stdout_redirected(to=os.devnull, stdout=sys.stderr):
                self.model = fasttext.load_model(filepath)
            global_cache[filepath] = self.model
        kwargs.pop('input_dim', None)
        kwargs.pop('output_dim', None)
        kwargs.pop('mask_zero', None)
        if not name:
            name = os.path.splitext(os.path.basename(filepath))[0]
        super().__init__(input_dim=len(self.model.words), output_dim=self.model['king'].size,
                         mask_zero=padding is not None, trainable=False, dtype=tf.string, name=name, **kwargs)
        embed_fn = np.frompyfunc(self.embed, 1, 1)
        # vf = np.vectorize(self.embed, otypes=[np.ndarray])
        self._embed_np = embed_fn