Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_mllib_model(spark_context):
# Build RDD from numpy features and labels
lp_rdd = to_labeled_point(spark_context, x_train,
y_train, categorical=True)
# Initialize SparkModel from Keras model and Spark context
spark_model = SparkMLlibModel(
model=model, frequency='epoch', mode='synchronous')
# Train Spark model
spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0,
validation_split=0.1, categorical=True, nb_classes=nb_classes)
# Evaluate Spark model by evaluating the underlying model
score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
print('Test accuracy:', score[1])
def test_from_labeled_rdd_categorical(spark_context):
features = np.ones((2, 10))
labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]])
lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, True)
x, y = rdd_utils.from_labeled_point(lp_rdd, True, 3)
assert x.shape == features.shape
assert y.shape == labels.shape
def test_lp_to_simple_rdd_categorical(spark_context):
features = np.ones((2, 10))
labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]])
lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, True)
rdd = rdd_utils.lp_to_simple_rdd(lp_rdd, categorical=True, nb_classes=3)
assert rdd.first()[0].shape == (10,)
assert rdd.first()[1].shape == (3,)
def test_to_labeled_rdd_categorical(spark_context):
features = np.ones((2, 10))
labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]])
lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, True)
assert lp_rdd.count() == 2
first = lp_rdd.first()
assert first.features.shape == (10,)
assert first.label == 2.0
def test_to_labeled_rdd_not_categorical(spark_context):
features = np.ones((2, 10))
labels = np.asarray([[2.0], [1.0]])
lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, False)
assert lp_rdd.count() == 2
first = lp_rdd.first()
assert first.features.shape == (10,)
assert first.label == 2.0
def test_lp_to_simple_rdd_not_categorical(spark_context):
features = np.ones((2, 10))
labels = np.asarray([[2.0], [1.0]]).reshape((2,))
lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, False)
rdd = rdd_utils.lp_to_simple_rdd(lp_rdd, categorical=False, nb_classes=3)
assert rdd.first()[0].shape == (10,)
assert rdd.first()[1] == 2.0
def test_from_labeled_rdd(spark_context):
features = np.ones((2, 10))
labels = np.asarray([[2.0], [1.0]]).reshape((2,))
lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, False)
x, y = rdd_utils.from_labeled_point(lp_rdd, False, None)
assert x.shape == features.shape
assert y.shape == labels.shape
model.add(Dropout(0.2))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))
# Compile model
rms = RMSprop()
# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)
# Build RDD from numpy features and labels
lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True)
# Initialize SparkModel from Keras model and Spark context
spark_model = SparkMLlibModel(master_network=model, frequency='epoch', mode='synchronous',
master_metrics=['acc'])
# Train Spark model
spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0,
validation_split=0.1, categorical=True, nb_classes=nb_classes)
# Evaluate Spark model by evaluating the underlying model
score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
print('Test accuracy:', score[1])
def to_data_frame(sc, features, labels, categorical=False):
"""Convert numpy arrays of features and labels into Spark DataFrame
"""
lp_rdd = to_labeled_point(sc, features, labels, categorical)
sql_context = SQLContext(sc)
df = sql_context.createDataFrame(lp_rdd)
return df