Packages# import mlflow Paths & Config# MLFLOW_HOST = "127.0.0.1" MLFLOW_PORT = "8080" MLFLOW_URI = f"http://{MLFLOW_HOST}:{MLFLOW_PORT}" EXPERIMENT_NAME = "Get Started with MLflow" DATA_URL = "https://raw.githubusercontent.com/joekakone/datasets/master/datasets/ml-challenges/diabetes.csv" mlflow.set_tracking_uri(uri=MLFLOW_URI) mlflow.create_experiment(EXPERIMENT_NAME) mlflow.set_experiment(EXPERIMENT_NAME) Output<Experiment: artifact_location='mlflow-artifacts:/934956080996859228', creation_time=1707548279279, experiment_id='934956080996859228', last_update_time=1707548279279, lifecycle_stage='active', name='Get Started with MLflow', tags={}> dataset = pd.read_csv(DATA_URL) dataset.head() X = dataset.drop("Outcome", axis=1) y = dataset["Outcome"] Split Train/Test# X_train, X_test, y_train, y_test = train_test_split(X, y) Train model# # Hypterparameters params = dict(n_estimators=100, max_depth=6, max_features=3) # Create model rf = RandomForestClassifier(**params) # Train rf.fit(X_train, y_train) y_pred = rf.predict(X_test) score = rf.score(X_test, y_test) print(score) Output0.765625 with mlflow.start_run(): # Tags mlflow.set_tag("Training Info", "Basic LR model for diabete data") # Dataset mlflow.log_input(mlflow.data.from_pandas(dataset, source=DATA_URL), context='training') # Hyperparameters mlflow.log_params(params) # Infer the model signature signature = infer_signature(X_train, rf.predict(X_train)) # Score mlflow.log_metric("accuracy", score) # Log the model model_info = mlflow.sklearn.log_model( sk_model=rf, artifact_path="diabete_model", signature=signature, input_example=X_train, registered_model_name="diabete-classifier", ) import mlflow logged_model = 'runs:/38ad9aee189343cdadfcc79ec35ecc55/diabete_model' # Load model as a PyFuncModel. loaded_model = mlflow.pyfunc.load_model(logged_model) # Predict on a Pandas DataFrame. import pandas as pd loaded_model.predict(pd.DataFrame(X_test)) MLflow génère un fichier MLmodel artifact_path: iris_rf flavors: python_function: env: conda: conda.yaml virtualenv: python_env.yaml loader_module: mlflow.sklearn model_path: model.pkl predict_fn: predict python_version: 3.10.12 sklearn: code: null pickled_model: model.pkl serialization_format: cloudpickle sklearn_version: 1.6.0 is_signature_from_type_hint: false mlflow_version: 2.20.0 model_size_bytes: 178735 model_uuid: 53c6b22e667043919b1333d15909959e run_id: 031e1f8d884e40ebba5a9c3c1a93337f signature: inputs: '[{"type": "double", "name": "sepal length (cm)", "required": true}, {"type": "double", "name": "sepal width (cm)", "required": true}, {"type": "double", "name": "petal length (cm)", "required": true}, {"type": "double", "name": "petal width (cm)", "required": true}]' outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "int64", "shape": [-1]}}]' params: null type_hint_from_example: false utc_time_created: '2025-01-25 17:07:46.036141'