How to use the deepctr.layers.utils.Hash function in deepctr

To help you get started, we’ve selected a few deepctr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shenweichen / DeepCTR / deepctr / inputs.py View on Github external
def varlen_embedding_lookup(embedding_dict, sequence_input_dict, varlen_sparse_feature_columns):
    varlen_embedding_vec_dict = {}
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        embedding_name = fc.embedding_name
        if fc.use_hash:
            lookup_idx = Hash(fc.vocabulary_size, mask_zero=True)(sequence_input_dict[feature_name])
        else:
            lookup_idx = sequence_input_dict[feature_name]
        varlen_embedding_vec_dict[feature_name] = embedding_dict[embedding_name](lookup_idx)
    return varlen_embedding_vec_dict
github shenweichen / DeepCTR / deepctr / inputs.py View on Github external
def embedding_lookup(sparse_embedding_dict, sparse_input_dict, sparse_feature_columns, return_feat_list=(),
                     mask_feat_list=(), to_list=False):
    group_embedding_dict = defaultdict(list)
    for fc in sparse_feature_columns:
        feature_name = fc.name
        embedding_name = fc.embedding_name
        if (len(return_feat_list) == 0 or feature_name in return_feat_list):
            if fc.use_hash:
                lookup_idx = Hash(fc.vocabulary_size, mask_zero=(feature_name in mask_feat_list))(
                    sparse_input_dict[feature_name])
            else:
                lookup_idx = sparse_input_dict[feature_name]

            group_embedding_dict[fc.group_name].append(sparse_embedding_dict[embedding_name](lookup_idx))
    if to_list:
        return list(chain.from_iterable(group_embedding_dict.values()))
    return group_embedding_dict
github shenweichen / DeepCTR / deepctr / layers / utils.py View on Github external
def __init__(self, num_buckets, mask_zero=False, **kwargs):
        self.num_buckets = num_buckets
        self.mask_zero = mask_zero
        super(Hash, self).__init__(**kwargs)
github shenweichen / DeepCTR / deepctr / models / nffm.py View on Github external
name='sparse_emb_' + str(
                                                                                 fc_j.embedding_name) + '_' + fc_i.embedding_name)
                                              for fc_i in
                                              sparse_feature_columns + varlen_sparse_feature_columns} for fc_j in
                        sparse_feature_columns + varlen_sparse_feature_columns}

    dense_value_list = get_dense_input(features, dnn_feature_columns)

    embed_list = []
    for fc_i, fc_j in itertools.combinations(sparse_feature_columns + varlen_sparse_feature_columns, 2):
        i_input = features[fc_i.name]
        if fc_i.use_hash:
            i_input = Hash(fc_i.vocabulary_size)(i_input)
        j_input = features[fc_j.name]
        if fc_j.use_hash:
            j_input = Hash(fc_j.vocabulary_size)(j_input)

        fc_i_embedding = feature_embedding(fc_i, fc_j, sparse_embedding, i_input)
        fc_j_embedding = feature_embedding(fc_j, fc_i, sparse_embedding, j_input)

        element_wise_prod = multiply([fc_i_embedding, fc_j_embedding])
        if reduce_sum:
            element_wise_prod = Lambda(lambda element_wise_prod: K.sum(
                element_wise_prod, axis=-1))(element_wise_prod)
        embed_list.append(element_wise_prod)

    ffm_out = tf.keras.layers.Flatten()(concat_func(embed_list, axis=1))
    if use_bn:
        ffm_out = tf.keras.layers.BatchNormalization()(ffm_out)
    dnn_input = combined_dnn_input([ffm_out], dense_value_list)
    dnn_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(dnn_input)
    dnn_logit = Dense(1, use_bias=False)(dnn_out)
github shenweichen / DeepCTR / deepctr / models / nffm.py View on Github external
l2_reg_embedding),
                                                                             mask_zero=isinstance(fc_j,
                                                                                                  VarLenSparseFeat),
                                                                             name='sparse_emb_' + str(
                                                                                 fc_j.embedding_name) + '_' + fc_i.embedding_name)
                                              for fc_i in
                                              sparse_feature_columns + varlen_sparse_feature_columns} for fc_j in
                        sparse_feature_columns + varlen_sparse_feature_columns}

    dense_value_list = get_dense_input(features, dnn_feature_columns)

    embed_list = []
    for fc_i, fc_j in itertools.combinations(sparse_feature_columns + varlen_sparse_feature_columns, 2):
        i_input = features[fc_i.name]
        if fc_i.use_hash:
            i_input = Hash(fc_i.vocabulary_size)(i_input)
        j_input = features[fc_j.name]
        if fc_j.use_hash:
            j_input = Hash(fc_j.vocabulary_size)(j_input)

        fc_i_embedding = feature_embedding(fc_i, fc_j, sparse_embedding, i_input)
        fc_j_embedding = feature_embedding(fc_j, fc_i, sparse_embedding, j_input)

        element_wise_prod = multiply([fc_i_embedding, fc_j_embedding])
        if reduce_sum:
            element_wise_prod = Lambda(lambda element_wise_prod: K.sum(
                element_wise_prod, axis=-1))(element_wise_prod)
        embed_list.append(element_wise_prod)

    ffm_out = tf.keras.layers.Flatten()(concat_func(embed_list, axis=1))
    if use_bn:
        ffm_out = tf.keras.layers.BatchNormalization()(ffm_out)
github shenweichen / DeepCTR / deepctr / inputs.py View on Github external
def get_embedding_vec_list(embedding_dict, input_dict, sparse_feature_columns, return_feat_list=(), mask_feat_list=()):
    embedding_vec_list = []
    for fg in sparse_feature_columns:
        feat_name = fg.name
        if len(return_feat_list) == 0 or feat_name in return_feat_list:
            if fg.use_hash:
                lookup_idx = Hash(fg.vocabulary_size, mask_zero=(feat_name in mask_feat_list))(input_dict[feat_name])
            else:
                lookup_idx = input_dict[feat_name]

            embedding_vec_list.append(embedding_dict[feat_name](lookup_idx))

    return embedding_vec_list