How to use the ludwig.features.base_feature.BaseFeature function in ludwig

To help you get started, we’ve selected a few ludwig examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github uber / ludwig / ludwig / features / category_feature.py View on Github external
from ludwig.models.modules.loss_modules import sampled_softmax_cross_entropy
from ludwig.models.modules.loss_modules import weighted_softmax_cross_entropy
from ludwig.models.modules.measure_modules import accuracy as get_accuracy
from ludwig.models.modules.measure_modules import hits_at_k as get_hits_at_k
from ludwig.utils.math_utils import int_type
from ludwig.utils.math_utils import softmax
from ludwig.utils.metrics_utils import ConfusionMatrix
from ludwig.utils.misc import set_default_value
from ludwig.utils.strings_utils import UNKNOWN_SYMBOL
from ludwig.utils.strings_utils import create_vocabulary


logger = logging.getLogger(__name__)


class CategoryBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = CATEGORY

    preprocessing_defaults = {
        'most_common': 10000,
        'lowercase': False,
        'missing_value_strategy': FILL_WITH_CONST,
        'fill_value': UNKNOWN_SYMBOL
    }

    @staticmethod
    def get_feature_meta(column, preprocessing_parameters):
        idx2str, str2idx, str2freq, _ = create_vocabulary(
            column, 'stripped',
            num_most_frequent=preprocessing_parameters['most_common'],
github uber / ludwig / ludwig / features / date_feature.py View on Github external
import numpy as np
import tensorflow as tf
from dateutil.parser import parse

from ludwig.constants import *
from ludwig.features.base_feature import BaseFeature
from ludwig.features.base_feature import InputFeature
from ludwig.models.modules.date_encoders import DateEmbed, DateWave
from ludwig.utils.misc import set_default_value, get_from_registry

logger = logging.getLogger(__name__)

DATE_VECTOR_LENGTH = 9


class DateBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = DATE

    preprocessing_defaults = {
        'missing_value_strategy': FILL_WITH_CONST,
        'fill_value': '',
        'datetime_format': None
    }

    @staticmethod
    def get_feature_meta(column, preprocessing_parameters):
        return {
            'preprocessing': preprocessing_parameters
        }
github uber / ludwig / ludwig / features / image_feature.py View on Github external
from ludwig.constants import *
from ludwig.features.base_feature import BaseFeature
from ludwig.features.base_feature import InputFeature
from ludwig.models.modules.image_encoders import ResNetEncoder
from ludwig.models.modules.image_encoders import Stacked2DCNN
from ludwig.utils.data_utils import get_abs_path
from ludwig.utils.image_utils import greyscale
from ludwig.utils.image_utils import num_channels_in_image
from ludwig.utils.image_utils import resize_image
from ludwig.utils.misc import get_from_registry
from ludwig.utils.misc import set_default_value

logger = logging.getLogger(__name__)


class ImageBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = IMAGE

    preprocessing_defaults = {
        'missing_value_strategy': BACKFILL,
        'in_memory': True,
        'resize_method': 'interpolate',
        'scaling': 'pixel_normalization',
        'num_processes': 1
    }

    @staticmethod
    def get_feature_meta(column, preprocessing_parameters):
        return {
            'preprocessing': preprocessing_parameters
github uber / ludwig / ludwig / features / timeseries_feature.py View on Github external
from ludwig.features.sequence_feature import SequenceOutputFeature
from ludwig.models.modules.measure_modules import absolute_error
from ludwig.models.modules.measure_modules import error
from ludwig.models.modules.measure_modules import r2
from ludwig.models.modules.measure_modules import squared_error
from ludwig.utils.misc import get_from_registry
from ludwig.utils.misc import set_default_value
from ludwig.utils.strings_utils import tokenizer_registry

logger = logging.getLogger(__name__)


logger = logging.getLogger(__name__)


class TimeseriesBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = TIMESERIES

    preprocessing_defaults = {
        'timeseries_length_limit': 256,
        'padding_value': 0,
        'padding': 'right',
        'tokenizer': 'space',
        'missing_value_strategy': FILL_WITH_CONST,
        'fill_value': ''
    }

    @staticmethod
    def get_feature_meta(column, preprocessing_parameters):
        tokenizer = get_from_registry(
github uber / ludwig / ludwig / features / numerical_feature.py View on Github external
from ludwig.features.base_feature import OutputFeature
from ludwig.models.modules.fully_connected_modules import fc_layer
from ludwig.models.modules.initializer_modules import get_initializer
from ludwig.models.modules.measure_modules import \
    absolute_error as get_absolute_error
from ludwig.models.modules.measure_modules import error as get_error
from ludwig.models.modules.measure_modules import r2 as get_r2
from ludwig.models.modules.measure_modules import \
    squared_error as get_squared_error
from ludwig.utils.misc import set_default_value
from ludwig.utils.misc import set_default_values

logger = logging.getLogger(__name__)


class NumericalBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = NUMERICAL

    preprocessing_defaults = {
        'missing_value_strategy': FILL_WITH_CONST,
        'fill_value': 0,
        'normalization': None
    }

    @staticmethod
    def get_feature_meta(column, preprocessing_parameters):
        if preprocessing_parameters['normalization'] is not None:
            if preprocessing_parameters['normalization'] == 'zscore':
                return {
                    'mean': column.astype(np.float32).mean(),
github uber / ludwig / ludwig / features / vector_feature.py View on Github external
from ludwig.features.base_feature import OutputFeature
from ludwig.models.modules.dense_encoders import Dense
from ludwig.models.modules.loss_modules import weighted_softmax_cross_entropy
from ludwig.models.modules.measure_modules import \
    absolute_error as get_absolute_error
from ludwig.models.modules.measure_modules import error as get_error
from ludwig.models.modules.measure_modules import r2 as get_r2
from ludwig.models.modules.measure_modules import \
    squared_error as get_squared_error
from ludwig.utils.misc import get_from_registry
from ludwig.utils.misc import set_default_value

logger = logging.getLogger(__name__)


class VectorBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = VECTOR

    preprocessing_defaults = {
        'missing_value_strategy': FILL_WITH_CONST,
        'fill_value': ""
    }

    @staticmethod
    def get_feature_meta(column, preprocessing_parameters):
        return {
            'preprocessing': preprocessing_parameters
        }

    @staticmethod
github uber / ludwig / ludwig / features / sequence_feature.py View on Github external
from ludwig.models.modules.sequence_encoders import RNN
from ludwig.models.modules.sequence_encoders import StackedCNN
from ludwig.models.modules.sequence_encoders import StackedParallelCNN
from ludwig.utils.math_utils import softmax
from ludwig.utils.metrics_utils import ConfusionMatrix
from ludwig.utils.misc import get_from_registry
from ludwig.utils.misc import set_default_value
from ludwig.utils.strings_utils import PADDING_SYMBOL
from ludwig.utils.strings_utils import UNKNOWN_SYMBOL
from ludwig.utils.strings_utils import build_sequence_matrix
from ludwig.utils.strings_utils import create_vocabulary

logger = logging.getLogger(__name__)


class SequenceBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = SEQUENCE

    preprocessing_defaults = {
        'sequence_length_limit': 256,
        'most_common': 20000,
        'padding_symbol': PADDING_SYMBOL,
        'unknown_symbol': UNKNOWN_SYMBOL,
        'padding': 'right',
        'tokenizer': 'space',
        'lowercase': False,
        'vocab_file': None,
        'missing_value_strategy': FILL_WITH_CONST,
        'fill_value': ''
    }
github uber / ludwig / ludwig / features / text_feature.py View on Github external
from ludwig.features.base_feature import BaseFeature
from ludwig.features.sequence_feature import SequenceInputFeature
from ludwig.features.sequence_feature import SequenceOutputFeature
from ludwig.utils.math_utils import softmax
from ludwig.utils.metrics_utils import ConfusionMatrix
from ludwig.utils.misc import set_default_value
from ludwig.utils.misc import set_default_values
from ludwig.utils.strings_utils import PADDING_SYMBOL
from ludwig.utils.strings_utils import UNKNOWN_SYMBOL
from ludwig.utils.strings_utils import build_sequence_matrix
from ludwig.utils.strings_utils import create_vocabulary

logger = logging.getLogger(__name__)


class TextBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = TEXT

    preprocessing_defaults = {
        'char_tokenizer': 'characters',
        'char_vocab_file': None,
        'char_sequence_length_limit': 1024,
        'char_most_common': 70,
        'word_tokenizer': 'space_punct',
        'word_vocab_file': None,
        'word_sequence_length_limit': 256,
        'word_most_common': 20000,
        'padding_symbol': PADDING_SYMBOL,
        'unknown_symbol': UNKNOWN_SYMBOL,
        'padding': 'right',
github uber / ludwig / ludwig / features / bag_feature.py View on Github external
import numpy as np
import tensorflow as tf

from ludwig.constants import *
from ludwig.features.base_feature import BaseFeature
from ludwig.features.base_feature import InputFeature
from ludwig.features.feature_utils import set_str_to_idx
from ludwig.models.modules.embedding_modules import EmbedWeighted
from ludwig.utils.misc import set_default_value
from ludwig.utils.strings_utils import create_vocabulary

logger = logging.getLogger(__name__)


class BagBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = BAG

    preprocessing_defaults = {
        'tokenizer': 'space',
        'most_common': 10000,
        'lowercase': False,
        'missing_value_strategy': FILL_WITH_CONST,
        'fill_value': ''
    }

    @staticmethod
    def get_feature_meta(column, preprocessing_parameters):
        idx2str, str2idx, str2freq, max_size = create_vocabulary(
            column,
github uber / ludwig / ludwig / features / binary_feature.py View on Github external
from ludwig.features.base_feature import OutputFeature
from ludwig.models.modules.initializer_modules import get_initializer
from ludwig.models.modules.loss_modules import mean_confidence_penalty
from ludwig.models.modules.measure_modules import accuracy as get_accuracy
from ludwig.utils.metrics_utils import ConfusionMatrix
from ludwig.utils.metrics_utils import average_precision_score
from ludwig.utils.metrics_utils import precision_recall_curve
from ludwig.utils.metrics_utils import roc_auc_score
from ludwig.utils.metrics_utils import roc_curve
from ludwig.utils.misc import set_default_value
from ludwig.utils.misc import set_default_values

logger = logging.getLogger(__name__)


class BinaryBaseFeature(BaseFeature):
    def __init__(self, feature):
        super().__init__(feature)
        self.type = BINARY

    preprocessing_defaults = {
        'missing_value_strategy': FILL_WITH_CONST,
        'fill_value': 0
    }

    @staticmethod
    def get_feature_meta(column, preprocessing_parameters):
        return {}

    @staticmethod
    def add_feature_data(
            feature,