Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add script to call sm2a promotion pipeline #228

Merged
merged 20 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ jobs:
collection_ids=""
pip install -r ./scripts/requirements.txt
for file in ${PUBLISHED_COLLECTION_FILES}; do
collection_id=$(python3 ./scripts/generate-mdx.py "$file")
collection_id=$(python3 ./scripts/generate_mdx.py "$file")
collection_id=$(echo "$collection_id" | sed 's/^["\s]*//;s/["\s]*$//')
echo "Processed collection ID: $collection_id"
collection_ids="$collection_ids$collection_id,"
Expand Down Expand Up @@ -344,11 +344,32 @@ jobs:
echo "Updated Comment Body: $UPDATED_BODY"

publish-to-prod-on-pr-merge:
if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }}
# TEMPORARILY COMMENTED OUT TO TEST API REQUEST
# if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }}
botanical marked this conversation as resolved.
Show resolved Hide resolved
runs-on: ubuntu-latest
environment: staging
needs: [publish-new-datasets, create-mdx-files-and-open-pr]
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Use output from publish-new-datasets
run: |
echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}"

- name: Publish to production on PR merge
run: echo "NO-OP. This step runs when a PR is merged."
env:
PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }}
SM2A_ADMIN_USERNAME: ${{ secrets.SM2A_ADMIN_USERNAME }}
SM2A_ADMIN_PASSWORD: ${{ secrets.SM2A_ADMIN_PASSWORD }}
SM2A_API_URL: ${{ vars.SM2A_API_URL }}
PROMOTION_DAG: ${{ vars.PROMOTION_DAG_NAME }}

run: |
echo $PUBLISHED_COLLECTION_FILES
collection_ids=""
pip install -r ./scripts/requirements.txt
for file in ${PUBLISHED_COLLECTION_FILES}; do
python3 ./scripts/promote_to_production.py "$file"
echo "Processed file: $file"
done
31 changes: 31 additions & 0 deletions ingestion-data/staging/dataset-config/test-sm2a.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"collection": "hls-swir-falsecolor-composite-TEST-SM2A",
"title": "HLS SWIR FalseColor Composite",
"spatial_extent": {
"xmin": -156.75,
"ymin": 20.80,
"xmax": -156.55,
"ymax": 20.94
},
"temporal_extent": {
"startdate": "2023-08-08T00:00:00Z",
"enddate": "2023-08-08T23:59:59Z"
},
"data_type": "cog",
"license": "CC0-1.0",
"description": "HLS falsecolor composite imagery using Bands 12, 8A, and 4.",
"is_periodic": false,
"time_density": "day",
"sample_files": [
"s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-08_SWIR_falsecolor_cog.tif",
"s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-13_SWIR_falsecolor_cog.tif"
],
"discovery_items": [
{
"discovery": "s3",
"prefix": "maui-fire/",
"bucket": "veda-data-store-staging",
"filename_regex": "(.*)SWIR_falsecolor(.*).tif$"
}
]
}
31 changes: 0 additions & 31 deletions ingestion-data/staging/dataset-config/test.json
Original file line number Diff line number Diff line change
@@ -1,31 +0,0 @@
{
"collection": "hls-swir-falsecolor-composite-THIRD-TEST",
"title": "HLS SWIR FalseColor Composite",
"spatial_extent": {
"xmin": -156.75,
"ymin": 20.80,
"xmax": -156.55,
"ymax": 20.94
},
"temporal_extent": {
"startdate": "2023-08-08T00:00:00Z",
"enddate": "2023-08-08T23:59:59Z"
},
"data_type": "cog",
"license": "CC0-1.0",
"description": "HLS falsecolor composite imagery using Bands 12, 8A, and 4.",
"is_periodic": false,
"time_density": "day",
"sample_files": [
"s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-08_SWIR_falsecolor_cog.tif",
"s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-13_SWIR_falsecolor_cog.tif"
],
"discovery_items": [
{
"discovery": "s3",
"prefix": "maui-fire/",
"bucket": "veda-data-store-staging",
"filename_regex": "(.*)SWIR_falsecolor(.*).tif$"
}
]
}
File renamed without changes.
97 changes: 97 additions & 0 deletions scripts/promote_to_production.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from typing import Dict, Any

import http.client
import json
import sys
import os
import uuid
from base64 import b64encode


class MissingFieldError(Exception):
pass


def validate_discovery_item_config(item: Dict[str, Any]) -> Dict[str, Any]:
if "bucket" not in item:
raise MissingFieldError(
"Missing required field 'bucket' in discovery item: {item}"
)
if "discovery" not in item:
raise MissingFieldError(
"Missing required field 'discovery' in discovery item: {item}"
)
if "filename_regex" not in item:
raise MissingFieldError(
"Missing required field 'filename_regex' in discovery item: {item}"
)
if "prefix" not in item:
raise MissingFieldError(
"Missing required field 'prefix' in discovery item: {item}"
)
return item


def promote_to_production(payload):
base_api_url = os.getenv("SM2A_API_URL")
promotion_dag = os.getenv("PROMOTION_DAG_NAME", "veda_promotion_pipeline")
username = os.getenv("SM2A_ADMIN_USERNAME")
password = os.getenv("SM2A_ADMIN_PASSWORD")

api_token = b64encode(f"{username}:{password}".encode()).decode()
print(password)
print(api_token)

if not base_api_url or not api_token:
raise ValueError(
"SM2A_API_URL or SM2A_ADMIN_USERNAME or SM2A_ADMIN_PASSWORD is not"
+ " set in the environment variables."
)

headers = {
"Content-Type": "application/json",
"Authorization": "Basic " + api_token,
}

body = {
**payload,
"dag_run_id": f"{promotion_dag}-{uuid.uuid4()}",
"note": "Run from GitHub Actions veda-data",
}
http_conn = http.client.HTTPSConnection(base_api_url)
response = http_conn.request(
"POST", f"/api/v1/dags/{promotion_dag}/dagRuns", json.dumps(body), headers
)
response = http_conn.getresponse()
response_data = response.read()
print(f"Response: ${response_data}")
http_conn.close()

return {"statusCode": response.status, "body": response_data.decode()}


if __name__ == "__main__":
try:
with open(sys.argv[1], "r") as file:
input = json.load(file)
discovery_items = input.get("discovery_items")
validated_discovery_items = [
validate_discovery_item_config(item) for item in discovery_items
]

dag_input = {
"collection": input.get("collection"),
"data_type": input.get("data_type"),
"description": input.get("description"),
"discovery_items": validated_discovery_items,
"is_periodic": str(input.get("is_periodic", "true")),
"time_density": input.get("time_density"),
"title": input.get("title"),
"transfer": input.get("transfer", "false"),
}

dag_payload = {"conf": dag_input}
promote_to_production(dag_payload)

except json.JSONDecodeError:
raise ValueError(f"Invalid JSON content in file {sys.argv[1]}")
Loading