Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_tfidf_params(self):
corpus = ['a b c', 'b c d d', 'c b e c f']
ids = ['u1', 'u2', 'u3']
modality = TextModality(corpus=corpus, ids=ids, max_vocab=6,
tfidf_params={
'binary': False,
'norm': 'l2',
'use_idf': True,
'smooth_idf': True,
'sublinear_tf': False
}).build({'u1': 0, 'u2': 1, 'u3': 2})
npt.assert_array_equal(modality.batch_tfidf([1]),
self.modality.batch_tfidf([1]))
for k, v in {
'binary': True,
'norm': 'l1',
'use_idf': False,
'smooth_idf': False,
'sublinear_tf': True
def test_build(self):
TextModality().build()
TextModality(corpus=['abc']).build()
TextModality(corpus=['abc']).build({'b': 0})
TextModality(corpus=['abc'], ids=['a']).build({'b': 0})
def test_build(self):
TextModality().build()
TextModality(corpus=['abc']).build()
TextModality(corpus=['abc']).build({'b': 0})
TextModality(corpus=['abc'], ids=['a']).build({'b': 0})
bm.user_text = ImageModality()
except ValueError:
assert True
try:
bm.item_text = ImageModality()
except ValueError:
assert True
try:
bm.user_image = TextModality()
except ValueError:
assert True
try:
bm.item_image = TextModality()
except ValueError:
assert True
try:
bm.user_graph = TextModality()
except ValueError:
assert True
try:
bm.item_graph = ImageModality()
except ValueError:
assert True
try:
bm.sentiment = TextModality()
except ValueError:
bm.item_text = ImageModality()
except ValueError:
assert True
try:
bm.user_image = TextModality()
except ValueError:
assert True
try:
bm.item_image = TextModality()
except ValueError:
assert True
try:
bm.user_graph = TextModality()
except ValueError:
assert True
try:
bm.item_graph = ImageModality()
except ValueError:
assert True
try:
bm.sentiment = TextModality()
except ValueError:
assert True
try:
bm.sentiment = ImageModality()
except ValueError:
# limitations under the License.
# ============================================================================
"""Example for Collaborative Deep Learning"""
import cornac
from cornac.data import Reader
from cornac.datasets import citeulike
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer
docs, item_ids = citeulike.load_text()
data = citeulike.load_feedback(reader=Reader(item_set=item_ids))
# build text modality
item_text_modality = TextModality(corpus=docs, ids=item_ids,
tokenizer=BaseTokenizer(stop_words='english'),
max_vocab=8000, max_doc_freq=0.5)
ratio_split = RatioSplit(data=data, test_size=0.2, exclude_unknowns=True,
item_text=item_text_modality, verbose=True, seed=123, rating_threshold=0.5)
cdl = cornac.models.CDL(k=50, autoencoder_structure=[200], max_iter=30,
lambda_u=0.1, lambda_v=1, lambda_w=0.1, lambda_n=1000)
rec_300 = cornac.metrics.Recall(k=300)
exp = cornac.Experiment(eval_method=ratio_split,
models=[cdl],
metrics=[rec_300])
exp.run()
# limitations under the License.
# ============================================================================
"""Example for HFT with Movilen 1m dataset """
import cornac
from cornac.data import Reader
from cornac.datasets import movielens
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer
plots, movie_ids = movielens.load_plot()
ml_1m = movielens.load_feedback(variant='1M', reader=Reader(item_set=movie_ids))
# build text module
item_text_modality = TextModality(corpus=plots, ids=movie_ids,
tokenizer=BaseTokenizer(sep='\t', stop_words='english'),
max_vocab=5000, max_doc_freq=0.5)
ratio_split = RatioSplit(data=ml_1m, test_size=0.2, exclude_unknowns=True,
item_text=item_text_modality, verbose=True, seed=123)
hft = cornac.models.HFT(k=10, max_iter=40, grad_iter=5, l2_reg=0.001, lambda_text=0.01, vocab_size=5000, seed=123)
mse = cornac.metrics.MSE()
exp = cornac.Experiment(eval_method=ratio_split,
models=[hft],
metrics=[mse],
user_based=False)
exp.run()
# limitations under the License.
# ============================================================================
"""Example for Collaborative Topic Modeling"""
import cornac
from cornac.data import Reader
from cornac.datasets import citeulike
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer
docs, item_ids = citeulike.load_text()
data = citeulike.load_feedback(reader=Reader(item_set=item_ids))
# build text modality
item_text_modality = TextModality(corpus=docs, ids=item_ids,
tokenizer=BaseTokenizer(sep=' ', stop_words='english'),
max_vocab=8000, max_doc_freq=0.5)
ratio_split = RatioSplit(data=data, test_size=0.2, exclude_unknowns=True,
item_text=item_text_modality, verbose=True, seed=123, rating_threshold=0.5)
ctr = cornac.models.CTR(k=50, max_iter=50, lambda_v=1)
rec_300 = cornac.metrics.Recall(k=300)
exp = cornac.Experiment(eval_method=ratio_split,
models=[ctr],
metrics=[rec_300])
exp.run()
def item_text(self, input_modality):
if input_modality is not None and not isinstance(input_modality, TextModality):
raise ValueError(
'input_modality has to be instance of TextModality but {}'.format(type(input_modality)))
self.__item_text = input_modality
def user_text(self, input_modality):
if input_modality is not None and not isinstance(input_modality, TextModality):
raise ValueError(
'input_modality has to be instance of TextModality but {}'.format(type(input_modality)))
self.__user_text = input_modality