QData · brdbry · Nov 29, 2023 · Nov 29, 2023 · Dec 5, 2023
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.8
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,41 @@
+[tool.poetry]
+name = "spacetimeformer"
+version = "0.1.0"
+description = "QData Research Library for the Spacetimeformer project"
+authors = ["[email protected]"]
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.8"
+cython = ">=0.22"
+cmdstanpy = "0.9.68"
+# https://discourse.mc-stan.org/t/pystan-build-is-not-pep-517-compliant/32971/6
+# https://pystan.readthedocs.io/en/latest/faq.html#how-can-i-run-pystan-on-macos-with-apple-silicon-chips-apple-m1-m2-etc
+pystan = ">=2.19.1.1,<2.20.0.0"
+numpy = ">=1.15.4"
+pandas = ">=1.0.4"
+matplotlib = ">=2.0.0"
+convertdate = ">=2.1.2"
+python-dateutil = ">=2.8.0"
+performer-pytorch = "^1.1.4"
+tqdm = ">=4.36.1"
+nystrom-attention = "^0.0.11"
+pytorch-lightning = "1.6"
+netcdf4 = "^1.6.5"
+scikit-learn = "^1.3.2"
+omegaconf = "^2.3.0"
+seaborn = "^0.13.0"
+opencv-python = "^4.8.1.78"
+wandb = "^0.16.0"
+einops = "^0.7.0"
+chardet = "^5.2.0"
+opt-einsum = "^3.3.0"
+torchmetrics = "0.5.1"
+torch = "1.11.0"
+torchvision = "0.12.0"
+joblib = "^1.3.2"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/spacetimeformer/data/csv_dataset.py b/spacetimeformer/data/csv_dataset.py
@@ -43,7 +43,7 @@ def __init__(
 
         if raw_df is None:
             self.data_path = data_path
-            assert os.path.exists(self.data_path)
+            assert os.path.exists(self.data_path), f"data_path not found: ${self.data_path}"
             raw_df = pd.read_csv(
                 self.data_path,
                 **read_csv_kwargs,
@@ -212,6 +212,10 @@ def val_data(self):
     def test_data(self):
         return self._test_data
 
+    @property
+    def scaler_obj(self):
+        return self._scaler
+
     def length(self, split):
         return {
             "train": len(self.train_data),
@@ -240,6 +244,14 @@ def __init__(
         self.target_points = target_points
         self.time_resolution = time_resolution
 
+        assert (
+            self.series.length(split) + time_resolution * (-target_points - context_points) + 1 > 0
+        ), (f"Dataset length for split {split} is negative. Check time_resolution, context_points, and target_points.\n"
+        f"Dataset length: {self.series.length(split)}\n"
+        f"Target points: {target_points}\n"
+        f"Context points: {context_points}\n"
+        f"Time Resolution: {time_resolution}")
+
         self._slice_start_points = [
             i
             for i in range(
@@ -249,6 +261,7 @@ def __init__(
                 + 1,
             )
         ]
+        print(f"${split} dataset length: {len(self)}")
 
     def __len__(self):
         return len(self._slice_start_points)

diff --git a/spacetimeformer/data/sine_waves_with_dates.csv b/spacetimeformer/data/sine_waves_with_dates.csv
diff --git a/spacetimeformer/forecaster.py b/spacetimeformer/forecaster.py
@@ -293,10 +293,8 @@ def configure_optimizers(self):
         )
         return {
             "optimizer": optimizer,
-            "lr_scheduler": {
-                "scheduler": scheduler,
-                "monitor": "val/loss",
-            },
+            "lr_scheduler": scheduler,
+            "monitor": "val/loss",
         }
 
     @classmethod

diff --git a/spacetimeformer/spacetimeformer_model/nn/encoder.py b/spacetimeformer/spacetimeformer_model/nn/encoder.py
@@ -53,6 +53,11 @@ def forward(self, x, self_mask_seq=None, output_attn=False):
         if self.local_attention:
             # attention on tokens of each variable ind.
             x1 = self.norm1(x)
+            # assert that second dim of x1 is a multiple of d_yc
+            assert (x1.shape[1] % self.d_yc == 0), (
+                "x1.shape[1] is not a multiple of d_yc. Check that train arg yc_dim matches the number of variables"
+                f"x1.shape[1] = {x1.shape[1]}, d_yc = {self.d_yc}"
+            )
             x1 = Localize(x1, self.d_yc)
             # TODO: localize self_mask_seq
             x1, _ = self.local_attention(

diff --git a/spacetimeformer/spacetimeformer_model/spacetimeformer_model.py b/spacetimeformer/spacetimeformer_model/spacetimeformer_model.py
@@ -326,7 +326,11 @@ def configure_optimizers(self):
             patience=3,
             factor=self.decay_factor,
         )
-        return [self.optimizer], [self.scheduler]
+        return {
+            "optimizer": self.optimizer,
+            "lr_scheduler": self.scheduler,
+            "monitor": "val/loss",
+        }
 
     @classmethod
     def add_cli(self, parser):

diff --git a/spacetimeformer/train.py b/spacetimeformer/train.py
@@ -4,11 +4,10 @@
 import warnings
 import os
 import uuid
-
 import pytorch_lightning as pl
 import torch
-
 import spacetimeformer as stf
+import pickle
 
 _MODELS = ["spacetimeformer", "mtgnn", "heuristic", "lstm", "lstnet", "linear", "s4"]
 
@@ -19,6 +18,7 @@
     "exchange",
     "precip",
     "toy2",
+    "sinewaves",
     "solar_energy",
     "syn",
     "mnist",
@@ -105,6 +105,8 @@ def create_parser():
     parser.add_argument("--accumulate", type=int, default=1)
     parser.add_argument("--val_check_interval", type=float, default=1.0)
     parser.add_argument("--limit_val_batches", type=float, default=1.0)
+    parser.add_argument("--max_epochs", type=int)
+    parser.add_argument("--log_every_n_steps", type=int, default=50)
     parser.add_argument("--no_earlystopping", action="store_true")
     parser.add_argument("--patience", type=int, default=5)
     parser.add_argument(
@@ -119,6 +121,9 @@ def create_parser():
 
 
 def create_model(config):
+    # x_dim time embedding dimension   
+    # yc_dim number of variables in context
+    # yt_dim number of variables in target
     x_dim, yc_dim, yt_dim = None, None, None
     if config.dset == "metr-la":
         x_dim = 2
@@ -148,6 +153,10 @@ def create_model(config):
         x_dim = 6
         yc_dim = 20
         yt_dim = 20
+    elif config.dset == "sinewaves":
+        x_dim = 6      
+        yc_dim = 3     
+        yt_dim = 3     
     elif config.dset == "syn":
         x_dim = 5
         yc_dim = 20
@@ -448,6 +457,7 @@ def create_dset(config):
         )
         INV_SCALER = dset.reverse_scaling
         SCALER = dset.apply_scaling
+        SCALER_OBJ = dset.scaler_obj
     elif config.dset in ["mnist", "cifar"]:
         if config.dset == "mnist":
             config.target_points = 28 - config.context_points
@@ -575,6 +585,7 @@ def create_dset(config):
         )
         INV_SCALER = dset.reverse_scaling
         SCALER = dset.apply_scaling
+        SCALER_OBJ = dset.scaler_obj
         NULL_VAL = None
         # PAD_VAL = -32.0
         PLOT_VAR_NAMES = target_cols
@@ -605,6 +616,7 @@ def create_dset(config):
         )
         INV_SCALER = dset.reverse_scaling
         SCALER = dset.apply_scaling
+        SCALER_OBJ = dset.scaler_obj
         NULL_VAL = None
         PLOT_VAR_NAMES = ["OT", "p (mbar)", "raining (s)"]
         PLOT_VAR_IDXS = [20, 0, 15]
@@ -627,6 +639,11 @@ def create_dset(config):
                 else:
                     raise ValueError(f"Unrecognized toy dataset {config.dset}")
             target_cols = [f"D{i}" for i in range(1, 21)]
+        elif "sinewaves" in config.dset:
+            target_cols = [
+                "Sine Wave 1","Sine Wave 2","Sine Wave 3"
+            ]
+            data_path = "./spacetimeformer/data/sine_waves_with_dates.csv"
         elif config.dset == "exchange":
             if data_path == "auto":
                 data_path = "./data/exchange_rate_converted.csv"
@@ -670,6 +687,7 @@ def create_dset(config):
         )
         INV_SCALER = dset.reverse_scaling
         SCALER = dset.apply_scaling
+        SCALER_OBJ = dset.scaler_obj
         NULL_VAL = None
 
     return (
@@ -680,6 +698,7 @@ def create_dset(config):
         PLOT_VAR_IDXS,
         PLOT_VAR_NAMES,
         PAD_VAL,
+        SCALER_OBJ,
     )
 
 
@@ -770,8 +789,17 @@ def main(args):
         plot_var_idxs,
         plot_var_names,
         pad_val,
+        scaler_obj,
     ) = create_dset(args)
 
+    # save scaler for inference post-training
+    with open('scaler_method.pkl', 'wb') as file:
+        pickle.dump(scaler, file)
+    with open('fitted_scaler_obj.pkl', 'wb') as file:
+        pickle.dump(scaler_obj, file)
+
+    assert (len(data_module.test_dataloader()) > 0), "The DataLoader should not be empty, check the Dataset __init__ and __getitem__"
+
     # Model
     args.null_value = null_val
     args.pad_value = pad_val
@@ -840,8 +868,10 @@ def main(args):
         gradient_clip_algorithm="norm",
         overfit_batches=20 if args.debug else 0,
         accumulate_grad_batches=args.accumulate,
-        sync_batchnorm=True,
+        sync_batchnorm=False, #set False on CPU, else "SyncBatchNorm layers only work with GPU modules"
         limit_val_batches=args.limit_val_batches,
+        max_epochs=args.max_epochs,
+        log_every_n_steps=args.log_every_n_steps,
         **val_control,
     )