diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/estimator.html b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/estimator.html new file mode 100644 index 0000000..e4c15c3 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/estimator.html @@ -0,0 +1,11 @@ + + + + + + + +
RandomForestRegressor(max_depth=6, max_features=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
+ + + \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/MLmodel b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/MLmodel new file mode 100644 index 0000000..151bee6 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/MLmodel @@ -0,0 +1,24 @@ +artifact_path: model +flavors: + python_function: + env: + conda: conda.yaml + virtualenv: python_env.yaml + loader_module: mlflow.sklearn + model_path: model.pkl + predict_fn: predict + python_version: 3.10.4 + sklearn: + code: null + pickled_model: model.pkl + serialization_format: cloudpickle + sklearn_version: 1.3.2 +mlflow_version: 2.8.0 +model_size_bytes: 700268 +model_uuid: 3b20b6daf07646699898ed732dc45419 +run_id: 62ce60957a5a4e93b8182918697230cf +signature: + inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 10]}}]' + outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]' + params: null +utc_time_created: '2023-11-22 00:45:00.460319' diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/conda.yaml b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/conda.yaml new file mode 100644 index 0000000..d592e91 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/conda.yaml @@ -0,0 +1,15 @@ +channels: +- conda-forge +dependencies: +- python=3.10.4 +- pip<=23.3.1 +- pip: + - mlflow==2.8.0 + - cloudpickle==2.2.1 + - numpy==1.26.1 + - packaging==23.2 + - psutil==5.9.6 + - pyyaml==6.0.1 + - scikit-learn==1.3.2 + - scipy==1.11.3 +name: mlflow-env diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/model.pkl b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/model.pkl new file mode 100644 index 0000000..9c6d5cf Binary files /dev/null and b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/model.pkl differ diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/python_env.yaml b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/python_env.yaml new file mode 100644 index 0000000..2472742 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/python_env.yaml @@ -0,0 +1,7 @@ +python: 3.10.4 +build_dependencies: +- pip==23.3.1 +- setuptools==58.1.0 +- wheel==0.41.3 +dependencies: +- -r requirements.txt diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/requirements.txt b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/requirements.txt new file mode 100644 index 0000000..1e3af3f --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts/model/requirements.txt @@ -0,0 +1,8 @@ +mlflow==2.8.0 +cloudpickle==2.2.1 +numpy==1.26.1 +packaging==23.2 +psutil==5.9.6 +pyyaml==6.0.1 +scikit-learn==1.3.2 +scipy==1.11.3 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/inputs/2901e21780957eeb7b293a72714bc168/meta.yaml b/mlruns/0/62ce60957a5a4e93b8182918697230cf/inputs/2901e21780957eeb7b293a72714bc168/meta.yaml new file mode 100644 index 0000000..b684f1a --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/inputs/2901e21780957eeb7b293a72714bc168/meta.yaml @@ -0,0 +1,6 @@ +destination_id: 21f6d9f0ac284309fa124c33fea309a0 +destination_type: RUN +source_id: 21f6d9f0ac284309fa124c33fea309a0 +source_type: DATASET +tags: + mlflow.data.context: train diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/inputs/2f52afd31a845d9682954ed6ba2c6da6/meta.yaml b/mlruns/0/62ce60957a5a4e93b8182918697230cf/inputs/2f52afd31a845d9682954ed6ba2c6da6/meta.yaml new file mode 100644 index 0000000..6ca2e8a --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/inputs/2f52afd31a845d9682954ed6ba2c6da6/meta.yaml @@ -0,0 +1,6 @@ +destination_id: 2d8f1b90af563814f690eb250ad83166 +destination_type: RUN +source_id: 2d8f1b90af563814f690eb250ad83166 +source_type: DATASET +tags: + mlflow.data.context: eval diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/meta.yaml b/mlruns/0/62ce60957a5a4e93b8182918697230cf/meta.yaml new file mode 100644 index 0000000..a1c3200 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/meta.yaml @@ -0,0 +1,15 @@ +artifact_uri: file:///panfs/accrepfs.vampire/home/gaglijt1/mltf/mlruns/0/62ce60957a5a4e93b8182918697230cf/artifacts +end_time: 1700613971729 +entry_point_name: '' +experiment_id: '0' +lifecycle_stage: active +run_id: 62ce60957a5a4e93b8182918697230cf +run_name: charming-bat-405 +run_uuid: 62ce60957a5a4e93b8182918697230cf +source_name: '' +source_type: 4 +source_version: '' +start_time: 1700613892216 +status: 3 +tags: [] +user_id: gaglijt1 diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_mean_absolute_error b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_mean_absolute_error new file mode 100644 index 0000000..47cd861 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_mean_absolute_error @@ -0,0 +1 @@ +1700613897968 29.21843005290767 0 diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_mean_squared_error b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_mean_squared_error new file mode 100644 index 0000000..63c33b3 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_mean_squared_error @@ -0,0 +1 @@ +1700613897968 1234.9272165506097 0 diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_r2_score b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_r2_score new file mode 100644 index 0000000..d220c55 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_r2_score @@ -0,0 +1 @@ +1700613897968 0.7821139716953788 0 diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_root_mean_squared_error b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_root_mean_squared_error new file mode 100644 index 0000000..0a9457b --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_root_mean_squared_error @@ -0,0 +1 @@ +1700613897968 35.14153122091594 0 diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_score b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_score new file mode 100644 index 0000000..dc8be4e --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/metrics/training_score @@ -0,0 +1 @@ +1700613899185 0.7821139716953788 0 diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/bootstrap b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/bootstrap new file mode 100644 index 0000000..4791ed5 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/bootstrap @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/ccp_alpha b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/ccp_alpha new file mode 100644 index 0000000..171538e --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/ccp_alpha @@ -0,0 +1 @@ +0.0 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/criterion b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/criterion new file mode 100644 index 0000000..0dd06a9 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/criterion @@ -0,0 +1 @@ +squared_error \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_depth b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_depth new file mode 100644 index 0000000..62f9457 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_depth @@ -0,0 +1 @@ +6 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_features b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_features new file mode 100644 index 0000000..e440e5c --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_features @@ -0,0 +1 @@ +3 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_leaf_nodes b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_leaf_nodes new file mode 100644 index 0000000..4af1832 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_leaf_nodes @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_samples b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_samples new file mode 100644 index 0000000..4af1832 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/max_samples @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_impurity_decrease b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_impurity_decrease new file mode 100644 index 0000000..171538e --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_impurity_decrease @@ -0,0 +1 @@ +0.0 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_samples_leaf b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_samples_leaf new file mode 100644 index 0000000..56a6051 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_samples_leaf @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_samples_split b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_samples_split new file mode 100644 index 0000000..d8263ee --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_samples_split @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_weight_fraction_leaf b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_weight_fraction_leaf new file mode 100644 index 0000000..171538e --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/min_weight_fraction_leaf @@ -0,0 +1 @@ +0.0 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/n_estimators b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/n_estimators new file mode 100644 index 0000000..105d7d9 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/n_estimators @@ -0,0 +1 @@ +100 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/n_jobs b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/n_jobs new file mode 100644 index 0000000..4af1832 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/n_jobs @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/oob_score b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/oob_score new file mode 100644 index 0000000..c1f22fb --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/oob_score @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/random_state b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/random_state new file mode 100644 index 0000000..4af1832 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/random_state @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/verbose b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/verbose new file mode 100644 index 0000000..c227083 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/verbose @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/warm_start b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/warm_start new file mode 100644 index 0000000..c1f22fb --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/params/warm_start @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/estimator_class b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/estimator_class new file mode 100644 index 0000000..c97c86c --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/estimator_class @@ -0,0 +1 @@ +sklearn.ensemble._forest.RandomForestRegressor \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/estimator_name b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/estimator_name new file mode 100644 index 0000000..fdec80f --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/estimator_name @@ -0,0 +1 @@ +RandomForestRegressor \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.autologging b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.autologging new file mode 100644 index 0000000..044bdd7 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.autologging @@ -0,0 +1 @@ +sklearn \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.log-model.history b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.log-model.history new file mode 100644 index 0000000..f52c409 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.log-model.history @@ -0,0 +1 @@ +[{"run_id": "62ce60957a5a4e93b8182918697230cf", "artifact_path": "model", "utc_time_created": "2023-11-22 00:45:00.460319", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.10.4", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.3.2", "serialization_format": "cloudpickle", "code": null}}, "model_uuid": "3b20b6daf07646699898ed732dc45419", "mlflow_version": "2.8.0", "signature": {"inputs": "[{\"type\": \"tensor\", \"tensor-spec\": {\"dtype\": \"float64\", \"shape\": [-1, 10]}}]", "outputs": "[{\"type\": \"tensor\", \"tensor-spec\": {\"dtype\": \"float64\", \"shape\": [-1]}}]", "params": null}, "model_size_bytes": 700268}] \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.runName b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.runName new file mode 100644 index 0000000..cb068c1 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.runName @@ -0,0 +1 @@ +charming-bat-405 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.source.git.commit b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.source.git.commit new file mode 100644 index 0000000..eee35c8 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.source.git.commit @@ -0,0 +1 @@ +2cf5575fae20e2b2e89a661ea42a68d57d318647 \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.source.name b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.source.name new file mode 100644 index 0000000..f516177 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.source.name @@ -0,0 +1 @@ +train_skl_rf.py \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.source.type b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.source.type new file mode 100644 index 0000000..0c2c1fe --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.source.type @@ -0,0 +1 @@ +LOCAL \ No newline at end of file diff --git a/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.user b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.user new file mode 100644 index 0000000..cd567d9 --- /dev/null +++ b/mlruns/0/62ce60957a5a4e93b8182918697230cf/tags/mlflow.user @@ -0,0 +1 @@ +gaglijt1 \ No newline at end of file diff --git a/mlruns/0/datasets/21f6d9f0ac284309fa124c33fea309a0/meta.yaml b/mlruns/0/datasets/21f6d9f0ac284309fa124c33fea309a0/meta.yaml new file mode 100644 index 0000000..44085b2 --- /dev/null +++ b/mlruns/0/datasets/21f6d9f0ac284309fa124c33fea309a0/meta.yaml @@ -0,0 +1,10 @@ +digest: a297e3e6 +name: dataset +profile: '{"features_shape": [331, 10], "features_size": 3310, "features_nbytes": + 26480, "targets_shape": [331], "targets_size": 331, "targets_nbytes": 2648}' +schema: '{"mlflow_tensorspec": {"features": "[{\"type\": \"tensor\", \"tensor-spec\": + {\"dtype\": \"float64\", \"shape\": [-1, 10]}}]", "targets": "[{\"type\": \"tensor\", + \"tensor-spec\": {\"dtype\": \"float64\", \"shape\": [-1]}}]"}}' +source: '{"tags": {"mlflow.user": "gaglijt1", "mlflow.source.name": "train_skl_rf.py", + "mlflow.source.type": "LOCAL", "mlflow.source.git.commit": "2cf5575fae20e2b2e89a661ea42a68d57d318647"}}' +source_type: code diff --git a/mlruns/0/datasets/2d8f1b90af563814f690eb250ad83166/meta.yaml b/mlruns/0/datasets/2d8f1b90af563814f690eb250ad83166/meta.yaml new file mode 100644 index 0000000..0bbcc79 --- /dev/null +++ b/mlruns/0/datasets/2d8f1b90af563814f690eb250ad83166/meta.yaml @@ -0,0 +1,9 @@ +digest: fdd80d1b +name: dataset +profile: '{"features_shape": [111, 10], "features_size": 1110, "features_nbytes": + 8880}' +schema: '{"mlflow_tensorspec": {"features": "[{\"type\": \"tensor\", \"tensor-spec\": + {\"dtype\": \"float64\", \"shape\": [-1, 10]}}]", "targets": null}}' +source: '{"tags": {"mlflow.user": "gaglijt1", "mlflow.source.name": "train_skl_rf.py", + "mlflow.source.type": "LOCAL", "mlflow.source.git.commit": "2cf5575fae20e2b2e89a661ea42a68d57d318647"}}' +source_type: code diff --git a/mlruns/0/meta.yaml b/mlruns/0/meta.yaml new file mode 100644 index 0000000..c81fab1 --- /dev/null +++ b/mlruns/0/meta.yaml @@ -0,0 +1,6 @@ +artifact_location: file:///panfs/accrepfs.vampire/home/gaglijt1/mltf/mlruns/0 +creation_time: 1700613890473 +experiment_id: '0' +last_update_time: 1700613890473 +lifecycle_stage: active +name: Default diff --git a/train_pytorch_lightning.py b/train_pytorch_lightning.py new file mode 100644 index 0000000..49b6383 --- /dev/null +++ b/train_pytorch_lightning.py @@ -0,0 +1,73 @@ +# Sequential NN example using the PyTorch Lightning API. +# Lightning modules must be used to activate MLflow autolog capabilities. +# Lightning will autodetect and run on a GPU, if availble, without explicit code. +import torch +import mlflow +from torch.utils.data import Dataset +from torchvision import datasets +from torchvision.transforms import ToTensor +from torch.utils.data import DataLoader +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import pytorch_lightning as L + +mlflow.pytorch.autolog() + +#In order to take advantage of MLflow autolog capabilities, we need a LightningModule +class SeqNet(L.LightningModule): + def __init__(self, input_size, hidden_size1, hidden_size2, output_size, lr): + super().__init__() + + self.lin1 = nn.Linear(input_size, hidden_size1) + self.lin2 = nn.Linear(hidden_size1, hidden_size2) + self.lin3 = nn.Linear(hidden_size2, output_size) + + + def forward(self, x): + x = torch.flatten(x,1) + x = self.lin1(x) + x = F.sigmoid(x) + x = self.lin2(x) + x = F.log_softmax(x, dim=1) + out = self.lin3(x) + return out +# training_step must be defined to use Lightning + def training_step(self, batch, batchidx): + images, labels = batch + output = self(images) + loss_func = nn.CrossEntropyLoss() + loss = loss_func(output, labels) + +#Using PyTorch, logging your loss in MLflow requires the following line: + self.log("train_loss", loss, prog_bar=True, on_step=False, on_epoch=True) + return loss + + def configure_optimizers(self): + optimizer = torch.optim.SGD(self.parameters(), lr=lr) + return optimizer + + +input_size = 784 +hidden_size1 = 200 +hidden_size2 = 200 +output_size = 10 +num_epochs = 20 +batch_size = 100 +lr = 0.01 + +model = SeqNet(input_size, hidden_size1, hidden_size2, output_size, lr) + + +fmnist_train = datasets.FashionMNIST(root="data", train=True, download=True, transform=ToTensor()) +fmnist_test = datasets.FashionMNIST(root="data", train=False, download=True, transform=ToTensor()) + +fmnist_train_loader = DataLoader(fmnist_train) +fmnist_test_loader = DataLoader(fmnist_test) + +#MLflow autologs runs from calls to Lighning "Trainers": +trainer = L.Trainer(limit_train_batches=batch_size, max_epochs=num_epochs) +trainer.fit(model=model, train_dataloaders=fmnist_train_loader) + + + diff --git a/train_pytorch_multigpu.py b/train_pytorch_multigpu.py new file mode 100644 index 0000000..e4d8362 --- /dev/null +++ b/train_pytorch_multigpu.py @@ -0,0 +1,136 @@ +#This example trais a Pytorch model using DDP, which parallelized data +#across miltiple GPUs +#note: MLflow autolog is not functional on the latest version of Pytorch + + +import torch +import mlflow +from torch.utils.data import Dataset +from torchvision import datasets +from torchvision.transforms import ToTensor +from torch.utils.data import DataLoader +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch.multiprocessing as mp +from torch.utils.data.distributed import DistributedSampler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.distributed import init_process_group, destroy_process_group +import os + +mlflow.autolog() + +def ddp_setup(rank, world_size): + """ + rank: Unique id of each process + world_size: Total number of processes + """ + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + + init_process_group(backend="nccl", rank=rank, world_size=world_size) + +class SeqNet(nn.Module): + def __init__(self, input_size, hidden_size1, hidden_size2, output_size): + super(SeqNet, self).__init__() + + self.lin1 = nn.Linear(input_size, hidden_size1) + self.lin2 = nn.Linear(hidden_size1, hidden_size2) + self.lin3 = nn.Linear(hidden_size2, output_size) + + + def forward(self, x): + x = torch.flatten(x,1) + x = self.lin1(x) + x = F.sigmoid(x) + x = self.lin2(x) + x = F.log_softmax(x, dim=1) + out = self.lin3(x) + return out + +def train(model, train_loader, loss_function, optimizer, rank, num_epochs): + model.to(rank) + model = DDP(model, device_ids=[rank]) + + for epoch in range(num_epochs): + + running_loss = 0.0 + model.train() + + + for i ,(images,labels) in enumerate(train_loader): + images = torch.div(images, 255.) + images, labels = images.to(rank), labels.to(rank) + + optimizer.zero_grad() + outputs = model(images) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() + + average_loss = running_loss / len(train_loader) + if rank == 0: + print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}') + + print('Training on GPU ' + str(rank) + ' finished.') + +def prepare_dataloader(dataset, batch_size): + return DataLoader( + dataset, + batch_size=batch_size, + pin_memory=True, + shuffle=False, + sampler=DistributedSampler(dataset) + ) + + +def main(rank, world_size): + ddp_setup(rank, world_size) + + #model and parameters + input_size = 784 + hidden_size1 = 200 + hidden_size2 = 200 + output_size = 10 + num_epochs = 10 + batch_size = 100 + lr = 0.01 + + + my_net = SeqNet(input_size, hidden_size1, hidden_size2, output_size) + + + optimizer = torch.optim.Adam( my_net.parameters(), lr=lr) + loss_function = nn.CrossEntropyLoss() + + fmnist_train = datasets.FashionMNIST(root="data", train=True, download=True, transform=ToTensor()) + + fmnist_train_loader = prepare_dataloader(fmnist_train, batch_size) + + train(my_net, fmnist_train_loader, loss_function, optimizer, rank, num_epochs) + + destroy_process_group() + +if __name__ == "__main__": + world_size = torch.cuda.device_count() # gets number of available GPUs + + print("Number of available GPUs: " + str(world_size)) + + mp.spawn(main, args=(world_size,), nprocs=world_size) + +# fmnist_test = datasets.FashionMNIST(root="data", train=False, download=True, transform=ToTensor()) +# fmnist_test_loader = DataLoader(fmnist_test, batch_size=100, shuffle=True) + +# +# correct = 0 +# total = 0 +# for images,labels in fmnist_test_loader: +# images = torch.div(images, 255.) +# output = my_net(images) +# _, predicted = torch.max(output,1) +# correct += (predicted == labels).sum() +# total += labels.size(0) +# +# print('Accuracy of the model: %.3f %%' %((100*correct)/(total+1))) diff --git a/train_pytorch_multigpu_mlflowbyhand.py b/train_pytorch_multigpu_mlflowbyhand.py new file mode 100644 index 0000000..1a50c8b --- /dev/null +++ b/train_pytorch_multigpu_mlflowbyhand.py @@ -0,0 +1,135 @@ +#This example trais a Pytorch model using DDP, which parallelized data +#across miltiple GPUs +#note: MLflow autolog is not functional on the latest version of Pytorch + +import torch +import mlflow +from torch.utils.data import Dataset +from torchvision import datasets +from torchvision.transforms import ToTensor +from torch.utils.data import DataLoader +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch.multiprocessing as mp +from torch.utils.data.distributed import DistributedSampler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.distributed import init_process_group, destroy_process_group +import os + +mlflow.autolog() + +def ddp_setup(rank, world_size): + """ + rank: Unique id of each process + world_size: Total number of processes + """ + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + + init_process_group(backend="nccl", rank=rank, world_size=world_size) + +class SeqNet(nn.Module): + def __init__(self, input_size, hidden_size1, hidden_size2, output_size): + super(SeqNet, self).__init__() + + self.lin1 = nn.Linear(input_size, hidden_size1) + self.lin2 = nn.Linear(hidden_size1, hidden_size2) + self.lin3 = nn.Linear(hidden_size2, output_size) + + + def forward(self, x): + x = torch.flatten(x,1) + x = self.lin1(x) + x = F.sigmoid(x) + x = self.lin2(x) + x = F.log_softmax(x, dim=1) + out = self.lin3(x) + return out + +def train(model, train_loader, loss_function, optimizer, rank, num_epochs): + model.to(rank) + model = DDP(model, device_ids=[rank]) + + for epoch in range(num_epochs): + + running_loss = 0.0 + model.train() + + + for i ,(images,labels) in enumerate(train_loader): + images = torch.div(images, 255.) + images, labels = images.to(rank), labels.to(rank) + + optimizer.zero_grad() + outputs = model(images) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() + + average_loss = running_loss / len(train_loader) + if rank == 0: + print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}') + + print('Training on GPU ' + str(rank) + ' finished.') + +def prepare_dataloader(dataset, batch_size): + return DataLoader( + dataset, + batch_size=batch_size, + pin_memory=True, + shuffle=False, + sampler=DistributedSampler(dataset) + ) + + +def main(rank, world_size): + ddp_setup(rank, world_size) + + #model and parameters + input_size = 784 + hidden_size1 = 200 + hidden_size2 = 200 + output_size = 10 + num_epochs = 10 + batch_size = 100 + lr = 0.01 + + + my_net = SeqNet(input_size, hidden_size1, hidden_size2, output_size) + + + optimizer = torch.optim.Adam( my_net.parameters(), lr=lr) + loss_function = nn.CrossEntropyLoss() + + fmnist_train = datasets.FashionMNIST(root="data", train=True, download=True, transform=ToTensor()) + + fmnist_train_loader = prepare_dataloader(fmnist_train, batch_size) + + train(my_net, fmnist_train_loader, loss_function, optimizer, rank, num_epochs) + + destroy_process_group() + +if __name__ == "__main__": + world_size = torch.cuda.device_count() # gets number of available GPUs + + print("Number of available GPUs: " + str(world_size)) + + mp.spawn(main, args=(world_size,), nprocs=world_size) + +# fmnist_test = datasets.FashionMNIST(root="data", train=False, download=True, transform=ToTensor()) +# fmnist_test_loader = DataLoader(fmnist_test, batch_size=100, shuffle=True) + +# +# correct = 0 +# total = 0 +# for images,labels in fmnist_test_loader: +# images = torch.div(images, 255.) +# output = my_net(images) +# _, predicted = torch.max(output,1) +# correct += (predicted == labels).sum() +# total += labels.size(0) +# +# print('Accuracy of the model: %.3f %%' %((100*correct)/(total+1))) diff --git a/train_pytorch_singlegpu.py b/train_pytorch_singlegpu.py new file mode 100644 index 0000000..19eb59d --- /dev/null +++ b/train_pytorch_singlegpu.py @@ -0,0 +1,114 @@ +#This example trains a sequential nueral network and logs +#our model and some paramterts/metric of interest with MLflow + +import torch +import mlflow +from torch.utils.data import Dataset +from torchvision import datasets +from torchvision.transforms import ToTensor +from torch.utils.data import DataLoader +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch.multiprocessing as mp +from torch.utils.data.distributed import DistributedSampler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.distributed import init_process_group, destroy_process_group +import os + + +class SeqNet(nn.Module): + def __init__(self, input_size, hidden_size1, hidden_size2, output_size): + super(SeqNet, self).__init__() + + self.lin1 = nn.Linear(input_size, hidden_size1) + self.lin2 = nn.Linear(hidden_size1, hidden_size2) + self.lin3 = nn.Linear(hidden_size2, output_size) + + + def forward(self, x): + x = torch.flatten(x,1) + x = self.lin1(x) + x = F.sigmoid(x) + x = self.lin2(x) + x = F.log_softmax(x, dim=1) + out = self.lin3(x) + return out + +def train(model, train_loader, loss_function, optimizer, num_epochs): + model.to(device) + + for epoch in range(num_epochs): + + running_loss = 0.0 + model.train() + + + for i ,(images,labels) in enumerate(train_loader): + images = torch.div(images, 255.) + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + running_loss += loss.item() + + average_loss = running_loss / len(train_loader) + +#log "loss" in MLflow. This funcion must be called within "with mlflow.start_run():" in main code + mlflow.log_metric("loss", average_loss, step=epoch) + + print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}') + + print('Training finished.') + + +#start MLflow run +with mlflow.start_run(): + + input_size = 784 + hidden_size1 = 200 + hidden_size2 = 200 + output_size = 10 + num_epochs = 10 + batch_size = 100 + lr = 0.01 + + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print("Training on device: ", device) + my_net = SeqNet(input_size, hidden_size1, hidden_size2, output_size) + my_net = my_net.to(device) + + + optimizer = torch.optim.Adam( my_net.parameters(), lr=lr) + loss_function = nn.CrossEntropyLoss() + + fmnist_train = datasets.FashionMNIST(root="data", train=True, download=True, transform=ToTensor()) + fmnist_test = datasets.FashionMNIST(root="data", train=False, download=True, transform=ToTensor()) + + fmnist_train_loader = DataLoader(fmnist_train, batch_size=batch_size, shuffle=True) + fmnist_test_loader = DataLoader(fmnist_test, batch_size=batch_size, shuffle=True) + + train(my_net, fmnist_train_loader, loss_function, optimizer, num_epochs) + + #log params and model in current MLflow run + + mlflow.log_params({"epochs": num_epochs, "lr" : lr}) + mlflow.pytorch.log_model(my_net, "model") + + + correct = 0 + total = 0 + for images,labels in fmnist_test_loader: + images = torch.div(images, 255.) + images = images.to(device) + labels = labels.to(device) + output = my_net(images) + _, predicted = torch.max(output,1) + correct += (predicted == labels).sum() + total += labels.size(0) + + print('Accuracy of the model: %.3f %%' %((100*correct)/(total+1))) diff --git a/train_skl_rf.py b/train_sklearn.py similarity index 70% rename from train_skl_rf.py rename to train_sklearn.py index 7a7e6f3..a1b700f 100644 --- a/train_skl_rf.py +++ b/train_sklearn.py @@ -1,12 +1,14 @@ -#!/usr/bin/env python3 +#Simple scikit-learn random forrest regressor, with MLflow autolog capabilities. +#This will track and save the model, paramters, metrics, and data on the MLflow server -#import mlflow +#!/usr/bin/env python3 +import mlflow from sklearn.model_selection import train_test_split from sklearn.datasets import load_diabetes from sklearn.ensemble import RandomForestRegressor -#mlflow.autolog() +mlflow.autolog() db = load_diabetes() X_train, X_test, y_train, y_test = train_test_split(db.data, db.target) diff --git a/train_tf_seq.py b/train_tensorflow_keras.py similarity index 76% rename from train_tf_seq.py rename to train_tensorflow_keras.py index db9ff1e..ab3ce50 100644 --- a/train_tf_seq.py +++ b/train_tensorflow_keras.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 +# This example makes use of TensorFlow and the Keras API to autolog model, paramters, etc. + import mlflow import tensorflow as tf import matplotlib.pyplot as plt -mlflow.autolog() +#the folling line is used to create an autolog entry on the MLflow server +mlflow.tensorflow.autolog() def plot_save_dig(image_data, fig_name): plt.imshow(image_data, cmap="binary") @@ -31,7 +34,9 @@ def plot_save_dig(image_data, fig_name): optimizer="sgd", metrics=["accuracy"]) -history = model.fit(X_train, y_train, epochs = 20, + +#MLflow autolog is launched upon the use of keras.fit() to train. +history = model.fit(X_train, y_train, epochs = 5, validation_data=(X_valid, y_valid)) model.evaluate(X_test, y_test)