How to use the mlflow.set_tracking_uri function in mlflow

To help you get started, we’ve selected a few mlflow examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mlflow / mlflow / tests / spark / test_spark_model_export.py View on Github external
def test_sparkml_estimator_model_log(tmpdir, spark_model_estimator):
    # Print the coefficients and intercept for multinomial logistic regression
    old_tracking_uri = mlflow.get_tracking_uri()
    cnt = 0
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        for dfs_tmp_dir in [None, os.path.join(str(tmpdir), "test")]:
            print("should_start_run =", should_start_run, "dfs_tmp_dir =", dfs_tmp_dir)
            try:
                tracking_dir = os.path.abspath(str(tmpdir.join("mlruns")))
                mlflow.set_tracking_uri("file://%s" % tracking_dir)
                if should_start_run:
                    mlflow.start_run()
                artifact_path = "model%d" % cnt
                cnt += 1
                sparkm.log_model(
                        artifact_path=artifact_path,
                        spark_model=spark_model_estimator.model,
                        dfs_tmpdir=dfs_tmp_dir)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=mlflow.active_run().info.run_id,
                    artifact_path=artifact_path)

                # test reloaded model
                reloaded_model = sparkm.load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmp_dir)
                preds_df = reloaded_model.transform(spark_model_estimator.spark_df)
                preds = [x.prediction for x in preds_df.select("prediction").collect()]
github mlflow / mlflow / tests / spark / test_spark_model_export.py View on Github external
artifact_path = "model%d" % cnt
                cnt += 1
                sparkm.log_model(artifact_path=artifact_path, spark_model=spark_model_iris.model,
                                 dfs_tmpdir=dfs_tmp_dir)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=mlflow.active_run().info.run_id,
                    artifact_path=artifact_path)

                # test reloaded model
                reloaded_model = sparkm.load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmp_dir)
                preds_df = reloaded_model.transform(spark_model_iris.spark_df)
                preds = [x.prediction for x in preds_df.select("prediction").collect()]
                assert spark_model_iris.predictions == preds
            finally:
                mlflow.end_run()
                mlflow.set_tracking_uri(old_tracking_uri)
                x = dfs_tmp_dir or sparkm.DFS_TMP
                shutil.rmtree(x)
                shutil.rmtree(tracking_dir)
github ThoughtWorksInc / ml-app-template / src / train.py View on Github external
import pandas as pd
from sklearn import datasets, metrics
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# load data
data = datasets.load_boston()

# preprocess data
x = pd.DataFrame(data.data, columns=data.feature_names)
column_order = x.columns
y = pd.DataFrame(data.target, columns=["MEDV"])
x_train, x_test, y_train, y_test = train_test_split(x, y)

# configure mlflow
mlflow.set_tracking_uri(uri='http://35.240.197.5:5000')
print('=== CircleCI env vars')
print(os.environ)
print("os.environ.get('CI', '') == 'true'")
print(os.environ.get('CI', '') == 'true')
if os.environ.get('CI', '') == 'true':
    mlflow.set_experiment('CI')
else:
    mlflow.set_experiment('dev')

with mlflow.start_run() as run:
    # define hyperparameters
    N_ESTIMATORS = 2
    MAX_DEPTH = 2

    # train model
    model = RandomForestRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH)
github GoogleCloudPlatform / ml-on-gcp / tutorials / tensorflow / mlflow_gcp / trainer / task.py View on Github external
shuffle=True,
        num_epochs=args.num_epochs,
        batch_size=args.batch_size)

    # Pass a numpy array by passing DataFrame.values
    validation_dataset = model.input_fn(
        features=eval_x.values,
        labels=eval_y,
        shuffle=False,
        num_epochs=args.num_epochs,
        batch_size=num_eval_examples)

    start_time = time()
    # Set MLflow tracking URI
    if args.mlflow_tracking_uri:
        mlflow.set_tracking_uri(args.mlflow_tracking_uri)
    # Train model
    with mlflow.start_run() as active_run:
        run_id = active_run.info.run_id

        # Callbacks
        class MlflowCallback(tf.keras.callbacks.Callback):
            # This function will be called after training completes.
            def on_train_end(self, logs=None):
                mlflow.log_param('num_layers', len(self.model.layers))
                mlflow.log_param('optimizer_name',
                                 type(self.model.optimizer).__name__)
        # MLflow callback
        mlflow_callback = MlflowCallback()
        # Setup Learning Rate decay callback.
        lr_decay_callback = tf.keras.callbacks.LearningRateScheduler(
            lambda epoch: args.learning_rate + 0.02 * (0.5 ** (1 + epoch)),
github PeterFogh / dvc_dask_use_case / evaluate.py View on Github external
def save_mlflow_run(params, metrices, artifacts):
    """Save MLflow run (params, metrices, artifacts) to tracking server."""
    mlflow.set_tracking_uri('http://localhost:5000')
    mlflow.set_experiment('dvc_dask_use_case')
    with mlflow.start_run():
        for stage, stage_params in params.items():
            for key, value in stage_params.items():
                mlflow.log_param(key, value)

        for metric, value in metrices.items():
            mlflow.log_metric(metric, value)

        for path in artifacts:
            mlflow.log_artifact(path)
github oroszgy / spacy-hungarian-models / src / model_builder / __main__.py View on Github external
def train_ner(model_name, output_path, train_data, dev_data, test_data, dropout, n_iter, patience):
    mlflow.set_tracking_uri("./mlruns")
    mlflow.set_experiment("Spacy NER")
    mlflow.start_run(run_name="Using all")

    if model_name in ["None", "False", "", "blank"]:
        model_name = None
    trainer = SpacyNerTrainer(model_name, output_path)

    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    logging.info("Reading train data")
    diterator = DataIterator()
    train_sentences = list(tqdm(itertools.islice(diterator.tagged_sentences(train_data), None)))
    logging.info("Got {} sentences with at least one entity".format(len(train_sentences)))

    logging.info("Reading test data")
    test_sentences = list(tqdm(diterator.tagged_sentences(test_data)))
    logging.info("Got {} sentences with at least one entity".format(len(test_sentences)))
github PipelineAI / models / keras / mlflow-mnist / model / pipeline_train.py View on Github external
def run(epochs, batch_size):
#    tracking_uri = 'https://community.cloud.pipeline.ai'

    users_home = '/mnt/pipelineai/users' 
    experiment_base_path = '%s/experiments' % users_home
    tracking_uri='file://%s' % experiment_base_path
    mlflow.set_tracking_uri(tracking_uri)

    experiment_name = '%s-%s' % (os.getenv('PIPELINE_RESOURCE_NAME', 'mnist'), os.getenv('PIPELINE_TAG', int(1000 * time.time())))

    mlflow.set_experiment(experiment_name)
    
    with mlflow.start_run() as run:
        mlflow.log_param("epochs", str(epochs))
        mlflow.log_param("batch_size", str(batch_size))

        mnist = tf.keras.datasets.mnist

        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train, x_test = x_train / 255.0, x_test / 255.0

        model = tf.keras.models.Sequential([
          tf.keras.layers.Flatten(input_shape=(28, 28)),
github paulgureghian / PyTorch_Projects / bitcoin_price _prediction.py View on Github external
### import packages 
#import os
import numpy as np 
import pandas as pd 
from statistics import mean 
from matplotlib import pyplot as plt 

from keras.layers import LSTM
from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import History 
history = History() 

import mlflow 
import mlflow.keras
mlflow.set_tracking_uri('/Users/paulgureghian/mlruns')   

from sklearn.preprocessing import MinMaxScaler

### read in the dataset to a dataframe 
pd.set_option('display.max_columns', 8)
pd.set_option('display.width', 1000)
df = pd.read_csv('/bitstamp.csv')

print(df.head())
print('')
print(df.shape) 
print('') 

### encode the date 
df['date'] = pd.to_datetime(df['Timestamp'], unit ='s').dt.date
group = df.groupby('date')
github PipelineAI / models / spark / pi-v1 / model / pipeline_train.py View on Github external
import mlflow

import calculate_pi

#user_id = 
user_id = ''
model_name = 'sparkpi'
model_tag = 'v1'

if __name__ == "__main__":
    """
        Usage: pi [partitions]
    """
    tracking_uri = 'https://community.cloud.pipeline.ai'

    mlflow.set_tracking_uri(tracking_uri)

    experiment_name = '%s%s-%s' % (user_id, model_name, model_tag)
    
    # This will create and set the experiment
    mlflow.set_experiment(experiment_name)

    with mlflow.start_run() as run:
        spark = SparkSession\
            .builder\
            .appName("PythonSparkPi")\
            .getOrCreate()

        partitions = 2
        n = 100000 * partitions

        mlflow.log_param('partitions', str(partitions))