Skip to content

Commit

Permalink
add file system to the index cache so it won't be treated as relative
Browse files Browse the repository at this point in the history
  • Loading branch information
Hgherzog committed Jan 7, 2025
1 parent b6234fd commit ccfbab7
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy_image_on_vm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ extra_args_model_predict="$extra_args_model_predict" \
-e CLOUDSDK_AUTH_ACCESS_TOKEN=$(gcloud auth application-default print-access-token) \
-e PL_API_KEY=$PL_API_KEY \
-e TILE_STORE_ROOT_DIR=$TILE_STORE_ROOT_DIR \
-e INDEX_CACHE_DIR=/index_cache \
-e INDEX_CACHE_DIR=file:///index_cache \
-v $LOCAL_INDEX_CACHE_DIR:/index_cache \
$DOCKER_IMAGE /bin/bash -c "$COMMAND" && \
echo "Data Extraction Complete" && \
Expand Down
2 changes: 1 addition & 1 deletion rslp/forest_loss_driver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Required environment variables:
- `RSLP_PREFIX`: GCS bucket prefix for model checkpoints \

Optional environment variables:
- `INDEX_CACHE_DIR`: Directory for caching image indices
- `INDEX_CACHE_DIR`: Directory for caching image indices MUST SPECIFY FILE SYSTEM OR IT WILL BE TREATED ad relative path
- `TILE_STORE_ROOT_DIR`: Directory for tile storage cache
- `PL_API_KEY`: Planet API key (if using Planet imagery)

Expand Down
2 changes: 1 addition & 1 deletion rslp/forest_loss_driver/inference/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def _get_most_recent_friday() -> datetime:
def _default_ds_root() -> str:
friday = PredictPipelineConfig._get_most_recent_friday()
dated_dataset_name = f"dataset_{friday.strftime('%Y%m%d')}"
return f"{os.environ.get('RSLP_PREFIX', 'gs://rslearn-eai')}/datasets/forest_loss_driver/final_test_6/prediction/{dated_dataset_name}"
return f"{os.environ.get('RSLP_PREFIX', 'gs://rslearn-eai')}/datasets/forest_loss_driver/final_test_7/prediction/{dated_dataset_name}"

model_predict_args: ModelPredictArgs
ds_root: str = field(default_factory=_default_ds_root)
Expand Down
11 changes: 9 additions & 2 deletions rslp/forest_loss_driver/predict_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,15 @@ def _validate_required_env_vars(
logger.warning(
f"The following optional environment variables are missing: {missing_optional_vars_str}"
)
logger.info(f"Environment variables: {os.environ}")
# check that we have PL_API_KEY if we are looking for planet images
if "INDEX_CACHE_DIR" in os.environ:
cache_dir = os.environ["INDEX_CACHE_DIR"]
if not any(
cache_dir.startswith(prefix) for prefix in ["gs://", "s3://", "file://"]
):
logger.warning(
f"INDEX_CACHE_DIR '{cache_dir}' does not specify filesystem - "
"will be treated as relative path"
)

def extract_dataset(self) -> None:
"""Extract the dataset."""
Expand Down
2 changes: 1 addition & 1 deletion rslp/utils/rslearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class ApplyWindowsArgs:

workers: int = 0
batch_size: int = 1
use_initial_job: bool = False
use_initial_job: bool = False # TODO: mathc no use_initial_job
jobs_per_process: int | None = None
group: str | None = None
window: str | None = None
Expand Down

0 comments on commit ccfbab7

Please sign in to comment.