How to use the ludwig.utils.misc.get_from_registry function in ludwig

To help you get started, we’ve selected a few ludwig examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github uber / ludwig / ludwig / features / timeseries_feature.py View on Github external
def get_feature_meta(column, preprocessing_parameters):
        tokenizer = get_from_registry(
            preprocessing_parameters['tokenizer'],
            tokenizer_registry
        )()
        max_length = 0
        for timeseries in column:
            processed_line = tokenizer(timeseries)
            max_length = max(max_length, len(processed_line))
        max_length = min(
            preprocessing_parameters['timeseries_length_limit'],
            max_length
        )

        return {'max_timeseries_length': max_length}
github uber / ludwig / ludwig / features / image_feature.py View on Github external
def get_image_encoder(self, encoder_parameters):
        return get_from_registry(
            self.encoder, image_encoder_registry)(
            **encoder_parameters
        )
github uber / ludwig / ludwig / features / date_feature.py View on Github external
def get_date_encoder(self, encoder_parameters):
        return get_from_registry(
            self.encoder, date_encoder_registry)(
            **encoder_parameters
        )
github uber / ludwig / ludwig / utils / strings_utils.py View on Github external
def build_sequence_matrix(
        sequences,
        inverse_vocabulary,
        tokenizer_type,
        length_limit,
        padding_symbol,
        padding='right',
        unknown_symbol=UNKNOWN_SYMBOL,
        lowercase=True,
        tokenizer_vocab_file=None,
):
    tokenizer = get_from_registry(tokenizer_type, tokenizer_registry)(
        vocab_file=tokenizer_vocab_file
    )
    format_dtype = int_type(len(inverse_vocabulary) - 1)

    max_length = 0
    unit_vectors = []
    for sequence in sequences:
        unit_indices_vector = _get_sequence_vector(
            sequence,
            tokenizer,
            format_dtype,
            inverse_vocabulary,
            lowercase=lowercase,
            unknown_symbol=unknown_symbol
        )
        unit_vectors.append(unit_indices_vector)
github uber / ludwig / ludwig / data / dataset_synthesyzer.py View on Github external
def build_feature_parameters(features):
    feature_parameters = {}
    for feature in features:
        fearure_builder_function = get_from_registry(
            feature['type'],
            parameters_builders_registry
        )

        feature_parameters[feature['name']] = fearure_builder_function(feature)
    return feature_parameters
github uber / ludwig / ludwig / features / sequence_feature.py View on Github external
def get_sequence_decoder(self, decoder_parameters):
        return get_from_registry(
            self.decoder, sequence_decoder_registry)(
            **decoder_parameters
        )
github uber / ludwig / ludwig / features / timeseries_feature.py View on Github external
def build_matrix(
            timeseries,
            tokenizer_name,
            length_limit,
            padding_value,
            padding='right'
    ):
        tokenizer = get_from_registry(
            tokenizer_name,
            tokenizer_registry
        )()
        max_length = 0
        ts_vectors = []
        for ts in timeseries:
            ts_vector = np.array(tokenizer(ts)).astype(np.float32)
            ts_vectors.append(ts_vector)
            if len(ts_vector) > max_length:
                max_length = len(ts_vector)

        if max_length < length_limit:
            logger.debug(
                'max length of {0}: {1} < limit: {2}'.format(
                    tokenizer_name,
                    max_length,
github uber / ludwig / ludwig / utils / strings_utils.py View on Github external
def get_sequence_vector(sequence, tokenizer_type, unit_to_id, lowercase=True):
    tokenizer = get_from_registry(tokenizer_type, tokenizer_registry)()
    format_dtype = int_type(len(unit_to_id) - 1)
    return _get_sequence_vector(
        sequence,
        tokenizer,
        format_dtype,
        unit_to_id,
        lowercase=lowercase
    )
github uber / ludwig / ludwig / predict.py View on Github external
def calculate_overall_stats(test_stats, output_features, dataset,
                            train_set_metadata):
    for output_feature in output_features:
        feature = get_from_registry(
            output_feature['type'],
            output_type_registry
        )
        feature.calculate_overall_stats(
            test_stats, output_feature, dataset, train_set_metadata
        )
github uber / ludwig / ludwig / data / dataset_synthesyzer.py View on Github external
def generate_datapoint(features):
    datapoint = []
    for feature in features:
        if ('cycle' in feature and feature['cycle'] is True and
                feature['type'] in cyclers_registry):
            cycler_function = cyclers_registry[feature['type']]
            feature_value = cycler_function(feature)
        else:
            generator_function = get_from_registry(
                feature['type'],
                generators_registry
            )
            feature_value = generator_function(feature)
        datapoint.append(feature_value)
    return datapoint