diff --git a/harvest-planner/.dockerignore b/harvest-planner/.dockerignore
new file mode 100644
index 0000000..ead0e6f
--- /dev/null
+++ b/harvest-planner/.dockerignore
@@ -0,0 +1,7 @@
+.dockerignore
+.git
+.idea
+.ruff_cache
+.venv
+objects.json
+ruff.toml
diff --git a/harvest-planner/.gitignore b/harvest-planner/.gitignore
new file mode 100644
index 0000000..ae8554d
--- /dev/null
+++ b/harvest-planner/.gitignore
@@ -0,0 +1,10 @@
+# python generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+
+# venv
+.venv
diff --git a/harvest-planner/.python-version b/harvest-planner/.python-version
new file mode 100644
index 0000000..530fe91
--- /dev/null
+++ b/harvest-planner/.python-version
@@ -0,0 +1 @@
+3.11.9
\ No newline at end of file
diff --git a/harvest-planner/Dockerfile b/harvest-planner/Dockerfile
new file mode 100644
index 0000000..dbeea0a
--- /dev/null
+++ b/harvest-planner/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.11  AS build-env
+
+ARG version=3.11
+ARG APP=app
+
+COPY . ./$APP
+
+WORKDIR /$APP
+
+COPY requirements.lock /$APP/
+RUN sed '/-e/d' requirements.lock > requirements.txt
+RUN --mount=type=cache,target=/root/.cache \
+    pip install -r ./requirements.txt
+
+FROM gcr.io/distroless/python3-debian12:debug
+ARG version=3.11
+ARG APP=app
+
+COPY --from=build-env /$APP /$APP
+COPY --from=build-env /usr/local/lib/python${version}/site-packages /usr/local/lib/python${version}/site-packages
+
+WORKDIR /$APP
+ENV PYTHONPATH=/usr/local/lib/python${version}/site-packages
+ENTRYPOINT [ "python", "src/harvest_planner/main.py" ]
\ No newline at end of file
diff --git a/harvest-planner/README.md b/harvest-planner/README.md
new file mode 100644
index 0000000..b2a22ef
--- /dev/null
+++ b/harvest-planner/README.md
@@ -0,0 +1,30 @@
+# Harvest Planner
+
+Harvest Planner estimates how much memory each poller needs to monitor ONTAP and StorageGRID clusters.
+Here's how to use it:
+
+1. Run the following Harvest command to gather object counts from you cluster(s)
+   `bin/harvest planner -p poller` # one cluster 
+   `bin/harvest planner`           # multiple clusters
+   `bin/harvest planner --docker`  # multiple clusters and run the following Docker command for you
+
+The planner command will create a `objects.json` file that contains the object counts for each cluster.
+
+2. Run the following Docker command to estimate how much memory each poller needs to monitor its cluster.
+
+```bash
+docker run --rm \
+  --volume "$(pwd)/objects.json:/objects.json" \
+  ghcr.io/netapp/harvest-planner \
+  estimate-memory -i /objects.json
+```
+
+# Development
+
+Harvest-planner is written in Python and uses [Rye](https://rye.astral.sh/) for development.
+
+To get started, install Rye, clone the repo, cd into `harvest-metrics/harvest-planner` and run the following command:
+
+```bash
+rye sync
+```
\ No newline at end of file
diff --git a/harvest-planner/models/gbr_model.pkl b/harvest-planner/models/gbr_model.pkl
new file mode 100644
index 0000000..2edb2e1
Binary files /dev/null and b/harvest-planner/models/gbr_model.pkl differ
diff --git a/harvest-planner/models/gbr_scaler.pkl b/harvest-planner/models/gbr_scaler.pkl
new file mode 100644
index 0000000..f2f7307
Binary files /dev/null and b/harvest-planner/models/gbr_scaler.pkl differ
diff --git a/harvest-planner/objects.json.example b/harvest-planner/objects.json.example
new file mode 100644
index 0000000..c7b1c34
--- /dev/null
+++ b/harvest-planner/objects.json.example
@@ -0,0 +1,116 @@
+[
+  {
+    "DiskConfig": 2040,
+    "DiskPerf": 6400,
+    "LunConfig": 0,
+    "LunPerf": 0,
+    "NFSClientsConfig": 0,
+    "Poller": "cluster-01",
+    "QtreeConfig": 1847,
+    "QtreePerf": 0,
+    "SVMConfig": 24,
+    "SensorConfig": 2400,
+    "SnapMirrorConfig": 0,
+    "SnapshotConfig": 180,
+    "StorageGridSG": 0,
+    "VolumeAnalyticsConfig": 0,
+    "VolumeConfig": 30492,
+    "VolumePerf": 9242,
+    "WorkloadDetailVolumePerf": 91296
+  },
+  {
+    "DiskConfig": 240,
+    "DiskPerf": 640,
+    "LunConfig": 0,
+    "LunPerf": 0,
+    "NFSClientsConfig": 0,
+    "Poller": "cluster-02",
+    "QtreeConfig": 0,
+    "QtreePerf": 0,
+    "SVMConfig": 24,
+    "SensorConfig": 0,
+    "SnapMirrorConfig": 0,
+    "SnapshotConfig": 0,
+    "StorageGridSG": 0,
+    "VolumeAnalyticsConfig": 1735,
+    "VolumeConfig": 30492,
+    "VolumePerf": 9242,
+    "WorkloadDetailVolumePerf": 91296
+  },
+  {
+    "DiskConfig": 23,
+    "DiskPerf": 64,
+    "LunConfig": 15,
+    "LunPerf": 6,
+    "NFSClientsConfig": 0,
+    "Poller": "sar",
+    "QtreeConfig": 935,
+    "QtreePerf": 938,
+    "SVMConfig": 87,
+    "SensorConfig": 186,
+    "SnapMirrorConfig": 684,
+    "SnapshotConfig": 1703,
+    "StorageGridSG": 0,
+    "VolumeAnalyticsConfig": 4,
+    "VolumeConfig": 940,
+    "VolumePerf": 922,
+    "WorkloadDetailVolumePerf": 0
+  },
+  {
+    "DiskConfig": 48,
+    "DiskPerf": 63,
+    "LunConfig": 15,
+    "LunPerf": 6,
+    "NFSClientsConfig": 0,
+    "Poller": "F2240-127-26",
+    "QtreeConfig": 862,
+    "QtreePerf": 17,
+    "SVMConfig": 273,
+    "SensorConfig": 0,
+    "SnapMirrorConfig": 89,
+    "SnapshotConfig": 0,
+    "StorageGridSG": 0,
+    "VolumeAnalyticsConfig": 0,
+    "VolumeConfig": 918,
+    "VolumePerf": 908,
+    "WorkloadDetailVolumePerf": 0
+  },
+  {
+    "DiskConfig": 24,
+    "DiskPerf": 66,
+    "LunConfig": 133,
+    "LunPerf": 133,
+    "NFSClientsConfig": 0,
+    "Poller": "nikhita",
+    "QtreeConfig": 0,
+    "QtreePerf": 86,
+    "SVMConfig": 12,
+    "SensorConfig": 242,
+    "SnapMirrorConfig": 1,
+    "SnapshotConfig": 2433,
+    "StorageGridSG": 0,
+    "VolumeAnalyticsConfig": 0,
+    "VolumeConfig": 90,
+    "VolumePerf": 89,
+    "WorkloadDetailVolumePerf": 73
+  },
+  {
+    "DiskConfig": 792,
+    "DiskPerf": 1144,
+    "LunConfig": 0,
+    "LunPerf": 0,
+    "NFSClientsConfig": 0,
+    "Poller": "nasclu01",
+    "QtreeConfig": 325,
+    "QtreePerf": 4228,
+    "SVMConfig": 92,
+    "SensorConfig": 1116,
+    "SnapMirrorConfig": 283,
+    "SnapshotConfig": 0,
+    "StorageGridSG": 0,
+    "VolumeAnalyticsConfig": 1,
+    "VolumeConfig": 4028,
+    "VolumePerf": 4020,
+    "WorkloadDetailVolumePerf": 0
+  }
+]
\ No newline at end of file
diff --git a/harvest-planner/pyproject.toml b/harvest-planner/pyproject.toml
new file mode 100644
index 0000000..12b8ed3
--- /dev/null
+++ b/harvest-planner/pyproject.toml
@@ -0,0 +1,27 @@
+[project]
+name = "harvest-planner"
+version = "0.1.0"
+description = "Harvest Planner estimates how much memory each poller needs to monitor ONTAP and StorageGRID clusters"
+dependencies = [
+    "joblib>=1.4.2",
+    "numpy>=2.0.0",
+    "pandas>=2.2.2",
+    "scikit-learn>=1.5.0",
+    "scipy>=1.13.1",
+]
+readme = "README.md"
+requires-python = ">= 3.8"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.rye]
+managed = true
+dev-dependencies = []
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/harvest_planner"]
diff --git a/harvest-planner/requirements-dev.lock b/harvest-planner/requirements-dev.lock
new file mode 100644
index 0000000..312c7a9
--- /dev/null
+++ b/harvest-planner/requirements-dev.lock
@@ -0,0 +1,36 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: []
+#   all-features: false
+#   with-sources: false
+#   generate-hashes: false
+
+-e file:.
+joblib==1.4.2
+    # via harvest-planner
+    # via scikit-learn
+numpy==2.0.0
+    # via harvest-planner
+    # via pandas
+    # via scikit-learn
+    # via scipy
+pandas==2.2.2
+    # via harvest-planner
+python-dateutil==2.9.0.post0
+    # via pandas
+pytz==2024.1
+    # via pandas
+scikit-learn==1.5.0
+    # via harvest-planner
+scipy==1.14.0
+    # via harvest-planner
+    # via scikit-learn
+six==1.16.0
+    # via python-dateutil
+threadpoolctl==3.5.0
+    # via scikit-learn
+tzdata==2024.1
+    # via pandas
diff --git a/harvest-planner/requirements.lock b/harvest-planner/requirements.lock
new file mode 100644
index 0000000..312c7a9
--- /dev/null
+++ b/harvest-planner/requirements.lock
@@ -0,0 +1,36 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: []
+#   all-features: false
+#   with-sources: false
+#   generate-hashes: false
+
+-e file:.
+joblib==1.4.2
+    # via harvest-planner
+    # via scikit-learn
+numpy==2.0.0
+    # via harvest-planner
+    # via pandas
+    # via scikit-learn
+    # via scipy
+pandas==2.2.2
+    # via harvest-planner
+python-dateutil==2.9.0.post0
+    # via pandas
+pytz==2024.1
+    # via pandas
+scikit-learn==1.5.0
+    # via harvest-planner
+scipy==1.14.0
+    # via harvest-planner
+    # via scikit-learn
+six==1.16.0
+    # via python-dateutil
+threadpoolctl==3.5.0
+    # via scikit-learn
+tzdata==2024.1
+    # via pandas
diff --git a/harvest-planner/ruff.toml b/harvest-planner/ruff.toml
new file mode 100644
index 0000000..a2d0db7
--- /dev/null
+++ b/harvest-planner/ruff.toml
@@ -0,0 +1,26 @@
+[lint]
+select = [
+    "ARG",
+    "B",
+    "E",
+    "ERA",
+    "F",
+    "I",
+    "ISC",
+    "NPY",
+    "PD",
+    "PERF",
+    "PIE",
+    "PL",
+    "PTH",
+    "PYI",
+    "Q",
+    "RET",
+    "RUF",
+    "S",
+    "SIM",
+    "UP",
+    "YTT",
+]
+
+ignore = ["ISC001"]
\ No newline at end of file
diff --git a/harvest-planner/src/harvest_planner/__init__.py b/harvest-planner/src/harvest_planner/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/harvest-planner/src/harvest_planner/main.py b/harvest-planner/src/harvest_planner/main.py
new file mode 100644
index 0000000..344e0a1
--- /dev/null
+++ b/harvest-planner/src/harvest_planner/main.py
@@ -0,0 +1,254 @@
+import argparse
+import pathlib
+
+import joblib
+import numpy as np
+import pandas as pd
+from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+
+# Constants
+MODEL_PATH = "models/gbr_model.pkl"
+SCALER_PATH = "models/gbr_scaler.pkl"
+
+BYTES_PER_MD = 1024 * 1024
+POLLER = "Poller"
+PLUS_SOME = 1.2  # 20% more memory than predicted
+PREDICTED_MB = "EstimatedMB"
+PREDICTED_RSS = "PredictedRss"
+RSS_BYTES = "RssBytes"
+RSS_MB = "RssMB"
+HARVESTED_FEATURES = [
+    "DiskConfig",
+    "DiskPerf",
+    "LunConfig",
+    "LunPerf",
+    "NFSClientsConfig",
+    "QtreeConfig",
+    "QtreePerf",
+    "SVMConfig",
+    "SensorConfig",
+    "SnapMirrorConfig",
+    "SnapshotConfig",
+    "StorageGridSG",
+    "VolumeAnalyticsConfig",
+    "VolumeConfig",
+    "VolumePerf",
+    "WorkloadDetailVolumePerf",
+]
+
+
+def train_model(args):
+    # check that the input file exists
+    if not args.input.exists():
+        print(f'Error: The input file "{args.input}" does not exist.')
+        return
+
+    # Load the CSV file
+    try:
+        data = pd.read_csv(args.input)
+    except Exception as e:
+        print(f'Error: Failed to read the input file "{args.input}". {e}')
+        return
+
+    # Prepare the data using the selected features
+    x_selected = data[HARVESTED_FEATURES]
+    y = data[RSS_BYTES]
+
+    # Split the data into training and testing sets
+    x_train, x_test, y_train, y_test = train_test_split(
+        x_selected, y, test_size=0.2, random_state=42
+    )
+
+    # Standardize the features
+    scaler = StandardScaler()
+    x_train_scaled = scaler.fit_transform(x_train)
+    x_test_scaled = scaler.transform(x_test)
+    x_selected_scaled = scaler.transform(x_selected)
+
+    # Set common parameters for GradientBoostingRegressor
+    params = {
+        "n_estimators": 100,
+        "learning_rate": 0.1,
+        "max_depth": 6,
+        "min_samples_split": 2,
+        "min_samples_leaf": 1,
+        "subsample": 0.9,
+        "max_features": 0.9,
+        "random_state": 42,
+    }
+
+    # Train the model with the common parameters
+    gbr = GradientBoostingRegressor(**params)
+    gbr.fit(x_train_scaled, y_train)
+
+    # Save the model and scaler to disk
+    model_file_path = MODEL_PATH
+    scaler_file_path = SCALER_PATH
+    joblib.dump(gbr, model_file_path)
+    joblib.dump(scaler, scaler_file_path)
+
+    # Predict using the loaded GradientBoostingRegressor model
+    data[PREDICTED_RSS] = gbr.predict(x_selected_scaled)
+    y_train_pred = gbr.predict(x_train_scaled)
+    y_test_pred = gbr.predict(x_test_scaled)
+
+    # Create new columns RssMB and PredictedMB
+    data[RSS_MB] = data[RSS_BYTES] / BYTES_PER_MD
+    data[PREDICTED_MB] = data[PREDICTED_RSS] / BYTES_PER_MD
+
+    # Save the updated DataFrame to a new CSV file
+    if args.save:
+        output_file_path = args.save
+        data.to_csv(output_file_path, index=False)
+
+    # Evaluate the model performance on training data
+    r2_train = r2_score(y_train, y_train_pred)
+    mae_train = mean_absolute_error(y_train, y_train_pred)
+    rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
+
+    # Evaluate the model performance on test data
+    r2_test = r2_score(y_test, y_test_pred)
+    mae_test = mean_absolute_error(y_test, y_test_pred)
+    rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))
+
+    # Display the first few rows of the updated DataFrame to verify the results
+    print(data[[RSS_BYTES, RSS_MB, PREDICTED_RSS, PREDICTED_MB]].head())
+
+    # Create a DataFrame to store evaluation metrics
+    metrics_df = pd.DataFrame(
+        {
+            "Dataset": ["Training", "Test"],
+            "R^2": [r2_train, r2_test],
+            "MAE": [mae_train, mae_test],
+            "RMSE": [rmse_train, rmse_test],
+        }
+    )
+
+    print("\nModel evaluation metrics:")
+    # Print the DataFrame
+    print(metrics_df)
+
+
+# Validate input
+def validate_input(df):
+    if POLLER not in df.columns:
+        print('Error: The DataFrame does not contain a "Poller" column.')
+        return False
+
+    is_valid = True
+    nan_indices = np.where(pd.isna(df))
+
+    if len(nan_indices[0]) == 0:
+        return True
+
+    for row, col in zip(*nan_indices):
+        poller_value = df.loc[row, POLLER]
+        poller_name = "unnamed" if pd.isna(poller_value) else poller_value
+        print(f'Poller "{poller_name}" is missing the required key: {df.columns[col]}')
+        is_valid = False
+
+    return is_valid
+
+
+def predict_size(args):
+    # check that the input file exists
+    if not args.input.exists():
+        print(f'Error: The input file "{args.input}" does not exist.')
+        return
+
+    # Load the input JSON file
+    try:
+        input_data = pd.read_json(args.input)
+    except Exception as e:
+        print(f'Error: Failed to read the input file "{args.input}". {e}')
+        return
+
+    is_valid = validate_input(input_data)
+    if not is_valid:
+        return
+
+    # Load the model and scaler
+    model_file_path = MODEL_PATH
+    scaler_file_path = SCALER_PATH
+    gbr = joblib.load(model_file_path)
+    scaler = joblib.load(scaler_file_path)
+
+    # Prepare the input data using the selected features
+    x_input = input_data[HARVESTED_FEATURES]
+    x_input_scaled = scaler.transform(x_input)
+
+    # Predict the memory size and add 20% (PLUS_SOME) more memory
+    input_data[PREDICTED_RSS] = gbr.predict(x_input_scaled)
+    input_data[PREDICTED_MB] = input_data[PREDICTED_RSS] * PLUS_SOME / BYTES_PER_MD
+
+    # Round the predicted memory size to the nearest integer and print with no decimals
+    input_data[PREDICTED_MB] = input_data[PREDICTED_MB].round(0).astype(int)
+
+    # Calculate the total predicted memory size
+    total_predicted_mb = input_data[PREDICTED_MB].sum()
+
+    # Add a summary row to the DataFrame
+    summary_row = pd.DataFrame({
+        POLLER: "Total",
+        PREDICTED_MB: [total_predicted_mb],
+    })
+    input_data = pd.concat([input_data, summary_row], ignore_index=True)
+
+    # Left justify the poller column
+    input_data[POLLER] = input_data[POLLER].apply(lambda x: f"{x:<}")
+
+    # Display the input data with the predicted memory size
+    print(input_data[[POLLER, PREDICTED_MB]].to_string(index=False))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Planner")
+    sub_parsers = parser.add_subparsers(dest="command", help="Available commands")
+
+    # Create a parser for the train command
+    train_parser = sub_parsers.add_parser("train", help="Train the model")
+    train_parser.add_argument(
+        "-i",
+        "--input",
+        type=pathlib.Path,
+        required=True,
+        help="CSV file with the training data",
+    )
+    train_parser.add_argument(
+        "-s",
+        "--save",
+        type=pathlib.Path,
+        help="Path of to save the input file with predictions",
+    )
+
+    # Create a parser for the estimateMemory command
+    predict_parser = sub_parsers.add_parser(
+        "estimate-memory", help="Estimate the amount of memory needed"
+    )
+    predict_parser.add_argument(
+        "-i",
+        "--input",
+        type=pathlib.Path,
+        required=True,
+        help="Object counts JSON file from bin/harvest planner",
+    )
+
+    args = parser.parse_args()
+
+    if args.command == "train":
+        train_model(args)
+    elif args.command == "estimate-memory":
+        predict_size(args)
+    else:
+        parser.print_help()
+
+
+def main():
+    parse_args()
+
+
+if __name__ == "__main__":
+    main()