Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def read_data_sets(data_dir):
"""
Parse or download movielens 1m data if train_dir is empty.
:param data_dir: The directory storing the movielens data
:return: a 2D numpy array with user index and item index in each row
"""
WHOLE_DATA = 'ml-1m.zip'
local_file = base.maybe_download(WHOLE_DATA, data_dir, SOURCE_URL + WHOLE_DATA)
zip_ref = zipfile.ZipFile(local_file, 'r')
extracted_to = os.path.join(data_dir, "ml-1m")
if not os.path.exists(extracted_to):
print("Extracting %s to %s" % (local_file, data_dir))
zip_ref.extractall(data_dir)
zip_ref.close()
rating_files = os.path.join(extracted_to,"ratings.dat")
rating_list = [i.strip().split("::") for i in open(rating_files,"r").readlines()]
movielens_data = np.array(rating_list).astype(int)
return movielens_data
labels is 1D unit8 nunpy array representing the label valued from 0 to 9.
```
"""
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
if data_type == "train":
local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
SOURCE_URL + TRAIN_IMAGES)
with open(local_file, 'rb') as f:
train_images = extract_images(f)
local_file = base.maybe_download(TRAIN_LABELS, train_dir,
SOURCE_URL + TRAIN_LABELS)
with open(local_file, 'rb') as f:
train_labels = extract_labels(f)
return train_images, train_labels
else:
local_file = base.maybe_download(TEST_IMAGES, train_dir,
SOURCE_URL + TEST_IMAGES)
with open(local_file, 'rb') as f:
test_images = extract_images(f)
local_file = base.maybe_download(TEST_LABELS, train_dir,
SOURCE_URL + TEST_LABELS)
with open(local_file, 'rb') as f:
test_labels = extract_labels(f)
return test_images, test_labels
with open(local_file, 'rb') as f:
train_images = extract_images(f)
local_file = base.maybe_download(TRAIN_LABELS, train_dir,
SOURCE_URL + TRAIN_LABELS)
with open(local_file, 'rb') as f:
train_labels = extract_labels(f)
return train_images, train_labels
else:
local_file = base.maybe_download(TEST_IMAGES, train_dir,
SOURCE_URL + TEST_IMAGES)
with open(local_file, 'rb') as f:
test_images = extract_images(f)
local_file = base.maybe_download(TEST_LABELS, train_dir,
SOURCE_URL + TEST_LABELS)
with open(local_file, 'rb') as f:
test_labels = extract_labels(f)
return test_images, test_labels
def download_imdb(dest_dir):
"""Download pre-processed IMDB movie review data
:argument
dest_dir: destination directory to store the data
:return
The absolute path of the stored data
"""
file_name = "imdb_full.pkl"
file_abs_path = base.maybe_download(file_name,
dest_dir,
'https://s3.amazonaws.com/text-datasets/imdb_full.pkl')
return file_abs_path
def download_glove(dest_dir):
glove = "glove.6B.zip"
glove_path = base.maybe_download(glove, dest_dir, GLOVE_URL)
zip_ref = zipfile.ZipFile(glove_path, 'r')
glove_dir = os.path.join(dest_dir, "glove.6B")
if not os.path.exists(glove_dir):
print("Extracting %s to %s" % (glove_path, glove_dir))
zip_ref.extractall(glove_dir)
zip_ref.close()
:return:
```
(ndarray, ndarray) representing (features, labels)
features is a 4D unit8 numpy array [index, y, x, depth] representing each pixel valued from 0 to 255.
labels is 1D unit8 nunpy array representing the label valued from 0 to 9.
```
"""
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
if data_type == "train":
local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
SOURCE_URL + TRAIN_IMAGES)
with open(local_file, 'rb') as f:
train_images = extract_images(f)
local_file = base.maybe_download(TRAIN_LABELS, train_dir,
SOURCE_URL + TRAIN_LABELS)
with open(local_file, 'rb') as f:
train_labels = extract_labels(f)
return train_images, train_labels
else:
local_file = base.maybe_download(TEST_IMAGES, train_dir,
SOURCE_URL + TEST_IMAGES)
with open(local_file, 'rb') as f:
test_images = extract_images(f)
with open(local_file, 'rb') as f:
train_images = extract_images(f)
local_file = base.maybe_download(TRAIN_LABELS, train_dir,
SOURCE_URL + TRAIN_LABELS)
with open(local_file, 'rb') as f:
train_labels = extract_labels(f)
return train_images, train_labels
else:
local_file = base.maybe_download(TEST_IMAGES, train_dir,
SOURCE_URL + TEST_IMAGES)
with open(local_file, 'rb') as f:
test_images = extract_images(f)
local_file = base.maybe_download(TEST_LABELS, train_dir,
SOURCE_URL + TEST_LABELS)
with open(local_file, 'rb') as f:
test_labels = extract_labels(f)
return test_images, test_labels