Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_model_export(spark_model_iris, model_path, spark_custom_env):
sparkm.save_model(spark_model_iris.model, path=model_path,
conda_env=spark_custom_env)
# 1. score and compare reloaded sparkml model
reloaded_model = sparkm.load_model(model_uri=model_path)
preds_df = reloaded_model.transform(spark_model_iris.spark_df)
preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
assert spark_model_iris.predictions == preds1
m = pyfunc.load_pyfunc(model_path)
# 2. score and compare reloaded pyfunc
preds2 = m.predict(spark_model_iris.pandas_df)
assert spark_model_iris.predictions == preds2
# 3. score and compare reloaded pyfunc Spark udf
preds3 = score_model_as_udf(model_uri=model_path, pandas_df=spark_model_iris.pandas_df)
assert spark_model_iris.predictions == preds3
assert os.path.exists(sparkm.DFS_TMP)
def test_model_save_persists_specified_conda_env_in_mlflow_model_directory(
h2o_iris_model, model_path, h2o_custom_env):
mlflow.h2o.save_model(h2o_model=h2o_iris_model.model, path=model_path, conda_env=h2o_custom_env)
pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
assert os.path.exists(saved_conda_env_path)
assert saved_conda_env_path != h2o_custom_env
with open(h2o_custom_env, "r") as f:
h2o_custom_env_text = f.read()
with open(saved_conda_env_path, "r") as f:
saved_conda_env_text = f.read()
assert saved_conda_env_text == h2o_custom_env_text
def test_model_save_persists_specified_conda_env_in_mlflow_model_directory(
sequential_model, model_path, pytorch_custom_env):
mlflow.pytorch.save_model(
pytorch_model=sequential_model, path=model_path, conda_env=pytorch_custom_env)
pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
assert os.path.exists(saved_conda_env_path)
assert saved_conda_env_path != pytorch_custom_env
with open(pytorch_custom_env, "r") as f:
pytorch_custom_env_text = f.read()
with open(saved_conda_env_path, "r") as f:
saved_conda_env_text = f.read()
assert saved_conda_env_text == pytorch_custom_env_text
def test_save_model_persists_specified_conda_env_in_mlflow_model_directory(
saved_tf_iris_model, model_path, tf_custom_env):
mlflow.tensorflow.save_model(tf_saved_model_dir=saved_tf_iris_model.path,
tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags,
tf_signature_def_key=saved_tf_iris_model.signature_def_key,
path=model_path,
conda_env=tf_custom_env)
pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
assert os.path.exists(saved_conda_env_path)
assert saved_conda_env_path != tf_custom_env
with open(tf_custom_env, "r") as f:
tf_custom_env_text = f.read()
with open(saved_conda_env_path, "r") as f:
saved_conda_env_text = f.read()
assert saved_conda_env_text == tf_custom_env_text
model_uri = "runs:///{run_id}/{artifact_path}".format(
run_id=run_id, artifact_path=artifact_path)
model_config = Model.load(
os.path.join(_download_artifact_from_uri(artifact_uri=model_uri), "MLmodel"))
with AzureMLMocks() as aml_mocks:
workspace = get_azure_workspace()
mlflow.azureml.build_image(model_uri=model_uri, workspace=workspace)
register_model_call_args = aml_mocks["register_model"].call_args_list
assert len(register_model_call_args) == 1
_, register_model_call_kwargs = register_model_call_args[0]
called_tags = register_model_call_kwargs["tags"]
assert called_tags["model_uri"] == model_uri
assert called_tags["python_version"] ==\
model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.PY_VERSION]
create_image_call_args = aml_mocks["create_image"].call_args_list
assert len(create_image_call_args) == 1
_, create_image_call_kwargs = create_image_call_args[0]
image_config = create_image_call_kwargs["image_config"]
assert image_config.tags["model_uri"] == model_uri
assert image_config.tags["python_version"] ==\
model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.PY_VERSION]
def test_model_save_load(h2o_iris_model, model_path):
h2o_model = h2o_iris_model.model
mlflow.h2o.save_model(h2o_model=h2o_model, path=model_path)
# Loading h2o model
h2o_model_loaded = mlflow.h2o.load_model(model_path)
assert all(
h2o_model_loaded.predict(h2o_iris_model.inference_data).as_data_frame() ==
h2o_model.predict(h2o_iris_model.inference_data).as_data_frame())
# Loading pyfunc model
pyfunc_loaded = mlflow.pyfunc.load_pyfunc(model_path)
assert all(
pyfunc_loaded.predict(h2o_iris_model.inference_data.as_data_frame()) ==
h2o_model.predict(h2o_iris_model.inference_data).as_data_frame())
def _model_udf(self):
spark = default_session()
return pyfunc.spark_udf(spark, model_uri=self._model_uri, result_type=self._return_type)
"""
Registry of supported flavor backends. Contains a mapping of flavors to flavor backends. This
mapping is used to select suitable flavor when deploying generic MLflow models.
Flavor backend can deploy particular flavor locally to generate predictions, deploy as a local
REST api endpoint, or build a docker image for serving the model locally or remotely.
Not all flavors have a flavor backend.
"""
import mlflow.pyfunc as pyfunc
from mlflow.pyfunc.backend import PyFuncBackend
from mlflow.rfunc.backend import RFuncBackend
_flavor_backends = {
pyfunc.FLAVOR_NAME: PyFuncBackend,
"crate": RFuncBackend
}
def get_flavor_backend(model, build_docker=True, **kwargs):
for flavor_name, flavor_config in model.flavors.items():
if flavor_name in _flavor_backends:
backend = _flavor_backends[flavor_name](flavor_config, **kwargs)
if build_docker and backend.can_build_image() or backend.can_score_model():
return flavor_name, backend
return None, None
if sample_input is not None:
mleap.add_to_model(mlflow_model=mlflow_model, path=dst_dir, spark_model=spark_model,
sample_input=sample_input)
conda_env_subpath = "conda.yaml"
if conda_env is None:
conda_env = get_default_conda_env()
elif not isinstance(conda_env, dict):
with open(conda_env, "r") as f:
conda_env = yaml.safe_load(f)
with open(os.path.join(dst_dir, conda_env_subpath), "w") as f:
yaml.safe_dump(conda_env, stream=f, default_flow_style=False)
mlflow_model.add_flavor(FLAVOR_NAME, pyspark_version=pyspark.__version__,
model_data=_SPARK_MODEL_PATH_SUB)
pyfunc.add_to_model(mlflow_model, loader_module="mlflow.spark", data=_SPARK_MODEL_PATH_SUB,
env=conda_env_subpath)
mlflow_model.save(os.path.join(dst_dir, "MLmodel"))
def score_model(spark, data_path, model_uri):
if os.path.isdir(data_path):
filenames = [os.path.abspath(os.path.join(data_path, x)) for x in os.listdir(data_path)
if os.path.isfile(os.path.join(data_path, x))]
else:
filenames = [data_path]
image_classifier_udf = mlflow.pyfunc.spark_udf(spark=spark,
model_uri=model_uri,
result_type=ArrayType(StringType()))
image_df = read_images(spark, filenames)
raw_preds = image_df.withColumn("prediction", image_classifier_udf("image")).select(
["filename", "prediction"]).toPandas()
# load the pyfunc model to get our domain
pyfunc_model = mlflow.pyfunc.load_pyfunc(model_uri=model_uri)
preds = pd.DataFrame(raw_preds["filename"], index=raw_preds.index)
preds[pyfunc_model._column_names] = pd.DataFrame(raw_preds['prediction'].values.tolist(),
columns=pyfunc_model._column_names,
index=raw_preds.index)
preds = pd.DataFrame(raw_preds["filename"], index=raw_preds.index)