How to use the featuretools.primitives.base.TransformPrimitive function in featuretools

To help you get started, we’ve selected a few featuretools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github FeatureLabs / featuretools / featuretools / feature_base / feature_base.py View on Github external
def __new__(self, base, entity=None, groupby=None, parent_entity=None,
                primitive=None, use_previous=None, where=None):
        # either direct or indentity
        if primitive is None and entity is None:
            return IdentityFeature(base)
        elif primitive is None and entity is not None:
            return DirectFeature(base, entity)
        elif primitive is not None and parent_entity is not None:
            assert isinstance(primitive, AggregationPrimitive) or issubclass(primitive, AggregationPrimitive)
            return AggregationFeature(base, parent_entity=parent_entity,
                                      use_previous=use_previous, where=where,
                                      primitive=primitive)
        elif primitive is not None:
            assert (isinstance(primitive, TransformPrimitive) or
                    issubclass(primitive, TransformPrimitive))
            if groupby is not None:
                return GroupByTransformFeature(base,
                                               primitive=primitive,
                                               groupby=groupby)
            return TransformFeature(base, primitive=primitive)

        raise Exception("Unrecognized feature initialization")
github FeatureLabs / nlp_primitives / nlp_primitives / polarity_score.py View on Github external
import nltk
import numpy as np
import pandas as pd
from featuretools.primitives.base import TransformPrimitive
from featuretools.variable_types import Numeric, Text
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize.treebank import TreebankWordDetokenizer

from .utilities import clean_tokens


class PolarityScore(TransformPrimitive):
    """Calculates the polarity of a text on a scale from -1 (negative) to 1 (positive)

    Description:
        Given a list of strings assign a polarity score from -1 (negative text),
        to 0 (neutral text), to 1 (positive text). The functions returns a score
        for every given piece of text. If a string is missing, return 'NaN'

    Examples:
        >>> x = ['He loves dogs', 'She hates cats', 'There is a dog', '']
        >>> polarity_score = PolarityScore()
        >>> polarity_score(x).tolist()
        [0.677, -0.649, 0.0, 0.0]
    """
    name = "polarity_score"
    input_types = [Text]
    return_type = Numeric
github FeatureLabs / nlp_primitives / nlp_primitives / part_of_speech_count.py View on Github external
import nltk
import numpy as np
import pandas as pd
from featuretools.primitives.base import TransformPrimitive
from featuretools.variable_types import Numeric, Text

from .utilities import clean_tokens


class PartOfSpeechCount(TransformPrimitive):
    """Calculates the occurences of each different part of speech.

    Description:
        Given a list of strings, categorize each word in the string as
        a different part of speech, and return the total count for each
        of 15 different categories of speech.

        If a string is missing, return `NaN`.

    Examples:
        >>> x = ['He was eating cheese', '']
        >>> part_of_speech_count = PartOfSpeechCount()
        >>> part_of_speech_count(x).tolist()
        [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [1.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [1.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [1.0, 0.0], [0.0, 0.0]]
    """
    name = "part_of_speech_count"
github FeatureLabs / nlp_primitives / nlp_primitives / upper_case_count.py View on Github external
# -*- coding: utf-8 -*-
import numpy as np
from featuretools.primitives.base import TransformPrimitive
from featuretools.variable_types import Numeric, Text


class UpperCaseCount(TransformPrimitive):
    """Calculates the number of upper case letters in text.

    Description:
        Given a list of strings, determine the number of characters in each string
        that are capitalized. Counts every letter individually, not just every
        word that contains capitalized letters.

        If a string is missing, return `NaN`

    Examples:
        >>> x = ['This IS a string.', 'This is a string', 'aaa']
        >>> upper_case_count = UpperCaseCount()
        >>> upper_case_count(x).tolist()
        [3.0, 1.0, 0.0]
    """
    name = "upper_case_count"
github FeatureLabs / featuretools / featuretools / primitives / standard / cum_transform_feature.py View on Github external
>>> cum_mean([1, 2, 3, 4, None, 5]).tolist()
        [1.0, 1.5, 2.0, 2.5, nan, 2.5]
    """
    name = "cum_mean"
    input_types = [Numeric]
    return_type = Numeric
    uses_full_entity = True

    def get_function(self):
        def cum_mean(values):
            return values.cumsum() / np.arange(1, len(values) + 1)

        return cum_mean


class CumMin(TransformPrimitive):
    """Calculates the cumulative minimum.

    Description:
        Given a list of values, return the cumulative min
        (or running min). There is no set window, so the min
        at each point is calculated over all prior values.
        `NaN` values will return `NaN`, but in the window of a
        cumulative caluclation, they're ignored.

    Examples:
        >>> cum_min = CumMin()
        >>> cum_min([1, 2, -3, 4, None, 5]).tolist()
        [1.0, 1.0, -3.0, -3.0, nan, -3.0]
    """
    name = "cum_min"
    input_types = [Numeric]
github FeatureLabs / nlp_primitives / nlp_primitives / lsa.py View on Github external
import nltk
import numpy as np
import pandas as pd
from featuretools.primitives.base import TransformPrimitive
from featuretools.variable_types import Numeric, Text
from nltk.tokenize.treebank import TreebankWordDetokenizer
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline

from .utilities import clean_tokens


class LSA(TransformPrimitive):
    """Calculates the Latent Semantic Analysis Values of Text Input

    Description:
        Given a list of strings, transforms those strings using tf-idf and single
        value decomposition to go from a sparse matrix to a compact matrix with two
        values for each string. These values represent that Latent Semantic Analysis
        of each string. These values will represent their context with respect to
        (nltk's brown sentence corpus.)[https://www.nltk.org/book/ch02.html#brown-corpus]

        If a string is missing, return `NaN`.

    Examples:
        >>> lsa = LSA()
        >>> x = ["he helped her walk,", "me me me eat food", "the sentence doth long"]
        >>> res = lsa(x).tolist()
        >>> for i in range(len(res)): res[i] = [abs(round(x, 2)) for x in res[i]]
github FeatureLabs / nlp_primitives / nlp_primitives / diversity_score.py View on Github external
import numpy as np
import pandas as pd
from featuretools.primitives.base import TransformPrimitive
from featuretools.variable_types import Numeric, Text

from .utilities import clean_tokens


class DiversityScore(TransformPrimitive):
    """Calculates the overall complexity of the text based on the total
       number of words used in the text

    Description:
        Given a list of strings, calculates the total number of unique words
        divided by the total number of words in order to give the text a score
        from 0-1 that indicates how unique the words used in it are. This
        primitive only evaluates the 'clean' versions of strings, so ignoring cases,
        punctuation, and stopwords in its evaluation.

        If a string is missing, return `NaN`

    Examples:
        >>> diversity_score = DiversityScore()
        >>> diversity_score(["hi hi hi", "hello its me", "hey what hey what", "a dog ate a basket"]).tolist()
        [0.3333333333333333, 1.0, 0.5, 1.0]
github FeatureLabs / featuretools / featuretools / primitives / standard / cum_transform_feature.py View on Github external
>>> cum_sum([1, 2, 3, 4, None, 5]).tolist()
        [1.0, 3.0, 6.0, 10.0, nan, 15.0]
    """
    name = "cum_sum"
    input_types = [Numeric]
    return_type = Numeric
    uses_full_entity = True

    def get_function(self):
        def cum_sum(values):
            return values.cumsum()

        return cum_sum


class CumCount(TransformPrimitive):
    """Calculates the cumulative count.

    Description:
        Given a list of values, return the cumulative count
        (or running count). There is no set window, so the
        count at each point is calculated over all prior
        values. `NaN` values are counted.

    Examples:
        >>> cum_count = CumCount()
        >>> cum_count([1, 2, 3, 4, None, 5]).tolist()
        [1, 2, 3, 4, 5, 6]
    """
    name = "cum_count"
    input_types = [[Id], [Discrete]]
    return_type = Numeric
github FeatureLabs / featuretools / featuretools / primitives / install.py View on Github external
# for python 2.7
        module = imp.load_source(module, filepath)
    else:
        # TODO: what is the first argument"?
        # for python >3.5
        spec = importlib.util.spec_from_file_location(module, filepath)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)

    primitives = []
    for primitive_name in vars(module):
        primitive_class = getattr(module, primitive_name)
        if (isclass(primitive_class) and
                issubclass(primitive_class, PrimitiveBase) and
                primitive_class not in (AggregationPrimitive,
                                        TransformPrimitive)):
            primitives.append((primitive_name, primitive_class))

    if len(primitives) == 0:
        raise RuntimeError("No primitive defined in file %s" % filepath)
    elif len(primitives) > 1:
        raise RuntimeError("More than one primitive defined in file %s" % filepath)

    return primitives[0]
github FeatureLabs / nlp_primitives / nlp_primitives / title_word_count.py View on Github external
# -*- coding: utf-8 -*-
import numpy as np
from featuretools.primitives.base import TransformPrimitive
from featuretools.variable_types import Numeric, Text


class TitleWordCount(TransformPrimitive):
    """Determines the number of title words in a string.

    Description:
        Given list of strings, determine the number of title words
        in each string. A title word is defined as any word starting
        with a capital letter. Words at the start of a sentence will
        be counted.

        If a string is missing, return `NaN`.

    Examples:
        >>> x = ['My favorite movie is Jaws.', 'this is a string', 'AAA']
        >>> title_word_count = TitleWordCount()
        >>> title_word_count(x).tolist()
        [2.0, 0.0, 1.0]
    """