How to use the mindmeld.models.helpers.requires function in mindmeld

To help you get started, we’ve selected a few mindmeld examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@requires(GAZETTEER_RSC)
def extract_in_gaz_ngram_features(**args):
    """Returns a feature extractor for surrounding ngrams in gazetteers
    """
    del args

    def _extractor(query, resources):
        def get_ngram_gaz_features(query, gazes, entity_type):
            tokens = query.normalized_tokens
            feat_seq = [{} for _ in tokens]

            for i, _ in enumerate(feat_seq):
                feat_prefix = "in_gaz|type:{}|ngram".format(entity_type)

                # entity PMI and conditional prob
                p_total = (
                    math.log(sum([g["total_entities"] for g in gazes.values()]) + 1) / 2
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@requires(WORD_FREQ_RSC)
def extract_gaz_freq(**args):
    """
    Extract frequency bin features for each gazetteer

    Returns:
        (function): A feature extraction function that returns the log of the \
            count of query tokens within each gazetteer's frequency bins.
    """
    del args

    def _extractor(query, resources):
        tokens = query.normalized_tokens
        freq_features = defaultdict(int)

        for tok in tokens:
            query_freq = "OOV" if resources[WORD_FREQ_RSC].get(tok) is None else "IV"
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@requires(WORD_NGRAM_FREQ_RSC)
def extract_bag_of_words_features(
    ngram_lengths_to_start_positions, thresholds=(0,), **args
):
    """Returns a bag-of-words feature extractor.

    Args:
        ngram_lengths_to_start_positions (dict)
        thresholds (int): Cut off value to include word in n-gram vocab

    Returns:
        (function) The feature extractor.
    """
    threshold_list = list(thresholds)
    word_thresholds = threshold_list + [0] * (
        len(ngram_lengths_to_start_positions.keys()) - len(threshold_list)
    )
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@requires(GAZETTEER_RSC)
def extract_in_gaz_feature(scaling=1, **args):
    """Returns a feature extractor that generates a set of features indicating the presence
    of query n-grams in different entity gazetteers. Used by the domain and intent classifiers
    when the 'in-gaz' feature is specified in the config.

    Args:
        scaling (int): A multiplicative scale factor to the ``ratio_pop`` and ``ratio`` features of
        the in-gaz feature set.

    Returns:
        function: Returns an extractor function
    """
    del args

    def _extractor(query, resources):
        in_gaz_features = defaultdict(float)
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@requires(QUERY_FREQ_RSC)
def extract_query_string(scaling=1000, **args):
    """
    Extract whole query string as a feature.

    Returns:
        (function) A feature extraction function that takes a query and \
            returns the whole query string for exact matching

    """

    def _extractor(query, resources):
        query_key = "<{}>".format(query.normalized_text)
        if query_key in resources[QUERY_FREQ_RSC]:
            return {"exact|query:{}".format(query_key): scaling}

        if args.get(ENABLE_STEMMING, False):
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@requires(ENABLE_STEMMING)
def enabled_stemming(**args):
    """Feature extractor for enabling stemming of the query
    """
    del args

    def _extractor(query, resources):
        # no op
        del query
        del resources

    return _extractor
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@requires(WORD_FREQ_RSC)
def extract_freq(bins=5, **args):
    """
    Extract frequency bin features.

    Args:
        bins (int): The number of frequency bins (besides OOV)

    Returns:
        (function): A feature extraction function that returns the log of the \
            count of query tokens within each frequency bin.

    """

    def _extractor(query, resources):
        tokens = query.normalized_tokens
        stemmed_tokens = query.stemmed_tokens
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@requires(SYS_TYPES_RSC)
def extract_sys_candidate_features(start_positions=(0,), **args):
    """Return an extractor for features based on a heuristic guess of numeric
    candidates at/near the current token.

    Args:
        start_positions (tuple): positions relative to current token (=0)

    Returns:
        (function) The feature extractor.
    """
    del args

    def _extractor(query, resources):
        feat_seq = [{} for _ in query.normalized_tokens]
        system_entities = query.get_system_entity_candidates(resources[SYS_TYPES_RSC])
        for entity in system_entities:
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@requires(WORD_FREQ_RSC)
def extract_edge_ngrams(lengths=(1,), **args):
    """
    Extract ngrams of some specified lengths.

    Args:
        lengths (list of int): The ngram length.

    Returns:
        (function) An feature extraction function that takes a query and \
            returns ngrams of the specified lengths at start and end of query.
    """
    del args

    def _extractor(query, resources):
        tokens = query.normalized_tokens
        feats = {}
github cisco / mindmeld / mindmeld / models / entity_features.py View on Github external
@requires(GAZETTEER_RSC)
def extract_in_gaz_features(**args):
    """Returns a feature extractor that finds any gazetteer matches against the input query"""
    del args

    def _extractor(example, resources):
        _, entities, entity_index = example
        features = {}
        current_entity = entities[entity_index]

        domain_gazes = resources[GAZETTEER_RSC]

        for gaz_name, gaz in domain_gazes.items():
            if current_entity.normalized_text in gaz["pop_dict"]:
                feat_name = "in_gaz|type:{}".format(gaz_name)
                features[feat_name] = 1