diff --git a/.github/workflows/deploy_image_on_vm.sh b/.github/workflows/deploy_image_on_vm.sh index f60323d..67c603f 100755 --- a/.github/workflows/deploy_image_on_vm.sh +++ b/.github/workflows/deploy_image_on_vm.sh @@ -264,7 +264,7 @@ extra_args_model_predict="$extra_args_model_predict" \ -e CLOUDSDK_AUTH_ACCESS_TOKEN=$(gcloud auth application-default print-access-token) \ -e PL_API_KEY=$PL_API_KEY \ -e TILE_STORE_ROOT_DIR=$TILE_STORE_ROOT_DIR \ - -e INDEX_CACHE_DIR=/index_cache \ + -e INDEX_CACHE_DIR=file:///index_cache \ -v $LOCAL_INDEX_CACHE_DIR:/index_cache \ $DOCKER_IMAGE /bin/bash -c "$COMMAND" && \ echo "Data Extraction Complete" && \ diff --git a/rslp/forest_loss_driver/README.md b/rslp/forest_loss_driver/README.md index 53aaf8c..e8106d0 100644 --- a/rslp/forest_loss_driver/README.md +++ b/rslp/forest_loss_driver/README.md @@ -35,7 +35,7 @@ Required environment variables: - `RSLP_PREFIX`: GCS bucket prefix for model checkpoints \ Optional environment variables: -- `INDEX_CACHE_DIR`: Directory for caching image indices +- `INDEX_CACHE_DIR`: Directory for caching image indices MUST SPECIFY FILE SYSTEM OR IT WILL BE TREATED ad relative path - `TILE_STORE_ROOT_DIR`: Directory for tile storage cache - `PL_API_KEY`: Planet API key (if using Planet imagery) diff --git a/rslp/forest_loss_driver/inference/config.py b/rslp/forest_loss_driver/inference/config.py index ebb67a8..7f750ef 100644 --- a/rslp/forest_loss_driver/inference/config.py +++ b/rslp/forest_loss_driver/inference/config.py @@ -176,7 +176,7 @@ def _get_most_recent_friday() -> datetime: def _default_ds_root() -> str: friday = PredictPipelineConfig._get_most_recent_friday() dated_dataset_name = f"dataset_{friday.strftime('%Y%m%d')}" - return f"{os.environ.get('RSLP_PREFIX', 'gs://rslearn-eai')}/datasets/forest_loss_driver/final_test_6/prediction/{dated_dataset_name}" + return f"{os.environ.get('RSLP_PREFIX', 'gs://rslearn-eai')}/datasets/forest_loss_driver/final_test_7/prediction/{dated_dataset_name}" model_predict_args: ModelPredictArgs ds_root: str = field(default_factory=_default_ds_root) diff --git a/rslp/forest_loss_driver/predict_pipeline.py b/rslp/forest_loss_driver/predict_pipeline.py index 0ddb4d8..f9d870b 100644 --- a/rslp/forest_loss_driver/predict_pipeline.py +++ b/rslp/forest_loss_driver/predict_pipeline.py @@ -65,8 +65,15 @@ def _validate_required_env_vars( logger.warning( f"The following optional environment variables are missing: {missing_optional_vars_str}" ) - logger.info(f"Environment variables: {os.environ}") - # check that we have PL_API_KEY if we are looking for planet images + if "INDEX_CACHE_DIR" in os.environ: + cache_dir = os.environ["INDEX_CACHE_DIR"] + if not any( + cache_dir.startswith(prefix) for prefix in ["gs://", "s3://", "file://"] + ): + logger.warning( + f"INDEX_CACHE_DIR '{cache_dir}' does not specify filesystem - " + "will be treated as relative path" + ) def extract_dataset(self) -> None: """Extract the dataset.""" diff --git a/rslp/utils/rslearn.py b/rslp/utils/rslearn.py index e5b4ce7..5f8b34b 100644 --- a/rslp/utils/rslearn.py +++ b/rslp/utils/rslearn.py @@ -28,7 +28,7 @@ class ApplyWindowsArgs: workers: int = 0 batch_size: int = 1 - use_initial_job: bool = False + use_initial_job: bool = False # TODO: mathc no use_initial_job jobs_per_process: int | None = None group: str | None = None window: str | None = None