How to use the deepctr.inputs.SparseFeat function in deepctr

To help you get started, we’ve selected a few deepctr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shenweichen / DeepCTR / tests / utils.py View on Github external
for i in range(sparse_feature_num):
        dim = np.random.randint(1, 10)
        feature_columns.append(SparseFeat(prefix+'sparse_feature_'+str(i), dim,embedding_size,use_hash=hash_flag,dtype=tf.int32))
    for i in range(dense_feature_num):
        feature_columns.append(DenseFeat(prefix+'dense_feature_'+str(i), 1,dtype=tf.float32))
    for i, mode in enumerate(sequence_feature):
        dim = np.random.randint(1, 10)
        maxlen = np.random.randint(1, 10)
        feature_columns.append(
            VarLenSparseFeat(prefix +'sequence_' + mode, maxlen=maxlen,vocabulary_size=dim,  embedding_dim=embedding_size, combiner=mode))



    for fc in feature_columns:
        if isinstance(fc,SparseFeat):
            model_input[fc.name]= np.random.randint(0, fc.vocabulary_size, sample_size)
        elif isinstance(fc,DenseFeat):
            model_input[fc.name] = np.random.random(sample_size)
        else:
            s_input, s_len_input = gen_sequence(
                fc.vocabulary_size, fc.maxlen, sample_size)
            model_input[fc.name] = s_input
            if include_length:
                fc.length_name = prefix+"sequence_"+str(i)+'_seq_length'
                model_input[prefix+"sequence_"+str(i)+'_seq_length'] = s_len_input
github shenweichen / DeepCTR / tests / utils.py View on Github external
if 'weight'  in sequence_feature:
        feature_columns.append(VarLenSparseFeat(prefix+"weighted_seq",maxlen=3,vocabulary_size=2,embedding_dim=embedding_size,length_name=prefix+"weighted_seq"+"_seq_length",weight_name=prefix+"weight"))
        s_input, s_len_input = gen_sequence(
            2, 3, sample_size)

        model_input[prefix+"weighted_seq"] = s_input
        model_input[prefix+'weight'] = np.random.randn(sample_size,3,1)
        model_input[prefix+"weighted_seq"+"_seq_length"] = s_len_input
        sequence_feature.pop(sequence_feature.index('weight'))


    for i in range(sparse_feature_num):
        dim = np.random.randint(1, 10)
        feature_columns.append(SparseFeat(prefix+'sparse_feature_'+str(i), dim,embedding_size,use_hash=hash_flag,dtype=tf.int32))
    for i in range(dense_feature_num):
        feature_columns.append(DenseFeat(prefix+'dense_feature_'+str(i), 1,dtype=tf.float32))
    for i, mode in enumerate(sequence_feature):
        dim = np.random.randint(1, 10)
        maxlen = np.random.randint(1, 10)
        feature_columns.append(
            VarLenSparseFeat(prefix +'sequence_' + mode, maxlen=maxlen,vocabulary_size=dim,  embedding_dim=embedding_size, combiner=mode))



    for fc in feature_columns:
        if isinstance(fc,SparseFeat):
            model_input[fc.name]= np.random.randint(0, fc.vocabulary_size, sample_size)
        elif isinstance(fc,DenseFeat):
            model_input[fc.name] = np.random.random(sample_size)
        else:
github shenweichen / DeepCTR / examples / run_classification_criteo.py View on Github external
dense_features = ['I' + str(i) for i in range(1, 14)]

    data[sparse_features] = data[sparse_features].fillna('-1', )
    data[dense_features] = data[dense_features].fillna(0, )
    target = ['label']

    # 1.Label Encoding for sparse features,and do simple Transformation for dense features
    for feat in sparse_features:
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])
    mms = MinMaxScaler(feature_range=(0, 1))
    data[dense_features] = mms.fit_transform(data[dense_features])

    # 2.count #unique features for each sparse field,and record dense feature field name

    fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4)
                           for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,)
                          for feat in dense_features]

    dnn_feature_columns = fixlen_feature_columns
    linear_feature_columns = fixlen_feature_columns

    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

    # 3.generate input data for model

    train, test = train_test_split(data, test_size=0.2)
    train_model_input = {name:train[name] for name in feature_names}
    test_model_input = {name:test[name] for name in feature_names}

    # 4.Define Model,train,predict and evaluate
    model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
github shenweichen / DeepCTR / examples / run_dien.py View on Github external
def get_xy_fd(use_neg=False, hash_flag=False):

    feature_columns = [SparseFeat('user', 3,embedding_dim=10,use_hash=hash_flag),
                       SparseFeat('gender', 2,embedding_dim=4,use_hash=hash_flag),
                       SparseFeat('item', 3+1,embedding_dim=8,use_hash=hash_flag),
                       SparseFeat('item_gender', 2+1,embedding_dim=4,use_hash=hash_flag),
                       DenseFeat('score', 1)]

    feature_columns += [VarLenSparseFeat('hist_item',maxlen=4,vocabulary_size=3+1,embedding_dim=8,  embedding_name='item',length_name="seq_length"),
                        VarLenSparseFeat('hist_item_gender', maxlen=4,vocabulary_size=3+1,embedding_dim=4, embedding_name='item_gender',length_name="seq_length")]

    behavior_feature_list = ["item","item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])#0 is mask value
    igender = np.array([1, 2, 1])# 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    hist_iid = np.array([[ 1, 2, 3,0], [ 1, 2, 3,0], [ 1, 2, 0,0]])
    hist_igender = np.array([[1, 1, 2,0 ], [2, 1, 1, 0], [2, 1, 0, 0]])

    behavior_length = np.array([3,3,2])
github shenweichen / DeepCTR / examples / run_dsin.py View on Github external
def get_xy_fd(hash_flag=False):

    feature_columns = [SparseFeat('user', 3, embedding_dim=10,use_hash=hash_flag),
                       SparseFeat('gender', 2, embedding_dim=4,use_hash=hash_flag),
                       SparseFeat('item', 3 + 1, embedding_dim=4,use_hash=hash_flag),
                       SparseFeat('item_gender', 2 + 1, embedding_dim=4,use_hash=hash_flag),
                       DenseFeat('score', 1)]
    feature_columns += [VarLenSparseFeat('sess_0_item',maxlen=4,vocabulary_size=3+1,embedding_dim=4,use_hash=hash_flag,embedding_name='item'),VarLenSparseFeat('sess_0_item_gender',maxlen=4,vocabulary_size=2+1,embedding_dim=4,use_hash=hash_flag,embedding_name='item_gender')]
    feature_columns += [VarLenSparseFeat('sess_1_item', maxlen= 4,vocabulary_size=3 + 1,embedding_dim=4, use_hash=hash_flag, embedding_name='item'),VarLenSparseFeat('sess_1_item_gender', maxlen= 4,vocabulary_size=2 + 1, embedding_dim=4,use_hash=hash_flag,embedding_name='item_gender')]

    behavior_feature_list = ["item", "item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])  # 0 is mask value
    igender = np.array([1, 2, 1])  # 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]])
    sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]])

    sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
github shenweichen / DeepCTR / deepctr / models / nffm.py View on Github external
def feature_embedding(fc_i, fc_j, embedding_dict, input_feature):
    fc_i_embedding = embedding_dict[fc_i.name][fc_j.name](input_feature)
    if isinstance(fc_i, SparseFeat):
        return NoMask()(fc_i_embedding)
    else:
        return SequencePoolingLayer(fc_i.combiner, supports_masking=True)(fc_i_embedding)
github shenweichen / DeepCTR / examples / run_dien.py View on Github external
def get_xy_fd(use_neg=False, hash_flag=False):

    feature_columns = [SparseFeat('user', 3,embedding_dim=10,use_hash=hash_flag),
                       SparseFeat('gender', 2,embedding_dim=4,use_hash=hash_flag),
                       SparseFeat('item', 3+1,embedding_dim=8,use_hash=hash_flag),
                       SparseFeat('item_gender', 2+1,embedding_dim=4,use_hash=hash_flag),
                       DenseFeat('score', 1)]

    feature_columns += [VarLenSparseFeat('hist_item',maxlen=4,vocabulary_size=3+1,embedding_dim=8,  embedding_name='item',length_name="seq_length"),
                        VarLenSparseFeat('hist_item_gender', maxlen=4,vocabulary_size=3+1,embedding_dim=4, embedding_name='item_gender',length_name="seq_length")]

    behavior_feature_list = ["item","item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])#0 is mask value
    igender = np.array([1, 2, 1])# 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    hist_iid = np.array([[ 1, 2, 3,0], [ 1, 2, 3,0], [ 1, 2, 0,0]])
    hist_igender = np.array([[1, 1, 2,0 ], [2, 1, 1, 0], [2, 1, 0, 0]])
github shenweichen / DeepCTR / examples / run_dien.py View on Github external
def get_xy_fd(use_neg=False, hash_flag=False):

    feature_columns = [SparseFeat('user', 3,embedding_dim=10,use_hash=hash_flag),
                       SparseFeat('gender', 2,embedding_dim=4,use_hash=hash_flag),
                       SparseFeat('item', 3+1,embedding_dim=8,use_hash=hash_flag),
                       SparseFeat('item_gender', 2+1,embedding_dim=4,use_hash=hash_flag),
                       DenseFeat('score', 1)]

    feature_columns += [VarLenSparseFeat('hist_item',maxlen=4,vocabulary_size=3+1,embedding_dim=8,  embedding_name='item',length_name="seq_length"),
                        VarLenSparseFeat('hist_item_gender', maxlen=4,vocabulary_size=3+1,embedding_dim=4, embedding_name='item_gender',length_name="seq_length")]

    behavior_feature_list = ["item","item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])#0 is mask value
    igender = np.array([1, 2, 1])# 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    hist_iid = np.array([[ 1, 2, 3,0], [ 1, 2, 3,0], [ 1, 2, 0,0]])
    hist_igender = np.array([[1, 1, 2,0 ], [2, 1, 1, 0], [2, 1, 0, 0]])