diff --git a/.github/workflows/productionworkflow.yaml b/.github/workflows/productionworkflow.yaml new file mode 100644 index 0000000..8a0897d --- /dev/null +++ b/.github/workflows/productionworkflow.yaml @@ -0,0 +1,38 @@ +name: Test Production + +on: + pull_request: + branches: + - main + +jobs: + test-production: + runs-on: ubuntu-latest + env: + db_name: ${{ secrets.DB_NAME }} + MONGO_URI_NAACP: ${{ secrets.MONGO_URI_NAACP }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.9' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/requirements.txt + + - name: Google Auth + id: auth + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + + - name: Run deployment + id: deployment + run: | + python ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py diff --git a/.gitignore b/.gitignore index 0663cfb..939444f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ secret.py testing-ner.ipynb keys/ env.sh +.env # Mill combine_rss_articles.ipynb diff --git a/se_ml_production/ML_backend_GKE/ML_Cloud_Run/Cloud_Run/main.py b/se_ml_production/ML_backend_GKE/ML_Cloud_Run/Cloud_Run/main.py index 5381927..9ff8cba 100644 --- a/se_ml_production/ML_backend_GKE/ML_Cloud_Run/Cloud_Run/main.py +++ b/se_ml_production/ML_backend_GKE/ML_Cloud_Run/Cloud_Run/main.py @@ -3,6 +3,7 @@ from ML_API import ml_router from global_state import global_instance from bootstrappers import bootstrap_pipeline, validate_bootstrap, bootstrap_MongoDB_Prod +import sys app.include_router(ml_router) @@ -35,6 +36,8 @@ async def startup_event(): print(f"[Error!] FATAL ERROR! | {e}") raise + sys.exit(0) + if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000) # This bootstraps the FastAPI diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py index 1710c35..ca75b62 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py @@ -1,7 +1,8 @@ import pandas as pd from tqdm import tqdm +import os + -import secret from global_state import global_instance from processingUtils import get_sentences, get_snippet, check_snippets, run_entity_recognition, run_pipeline @@ -70,7 +71,7 @@ def process_data(chunk, df, data_schema, data_packaging_scheme, nlp_ner): ] for (entities, method) in check_order: - check_text, location_geocode, existing_loc_geocode = check_snippets(secret.API_KEY, entities[1], entities[0]) + check_text, location_geocode, existing_loc_geocode = check_snippets(os.environ['API_KEY'], entities[1], entities[0]) if not check_text: discarded_articles.append(df['Tagging'][idx]) break diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py index 6706b42..2162efa 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py @@ -1,9 +1,9 @@ -import secret +import os from pymongo import MongoClient def connect_MongoDB_Prod(): try: - client = MongoClient(secret.MONGO_URI_NAACP) + client = MongoClient(os.environ['MONGO_URI_NAACP']) db = client['se_naacp_db'] return db except Exception as err: @@ -12,7 +12,7 @@ def connect_MongoDB_Prod(): def update_job_status(client, upload_id, user_id, timestamp, article_cnt, status, message): try: - db = client[secret.db_name] + db = client[os.environ['db_name']] upload_collection = db["uploads"] if (upload_collection.find_one({'uploadID': upload_id})): diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py index 9dbbeaf..dc8bd66 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py @@ -1,7 +1,7 @@ -import secret from datetime import datetime from global_state import global_instance from Mongo_Utils.mongo_funcs import connect_MongoDB_Prod +import os def convert_to_datesum(s): date_formatted = s.replace('-', '').replace(' ', '').replace(':', '') @@ -34,7 +34,7 @@ def addExistingTracts(tract_collection): def send_Discarded(client, discard_list): try: # Pack and send all articles - db_prod = client[secret.db_name] + db_prod = client[os.environ['db_name']] discarded_collection_name = "discarded" discarded_collection = db_prod[discarded_collection_name] @@ -62,7 +62,7 @@ def send_Discarded(client, discard_list): # ==== Packing Funcs ==== def send_to_production(client, df): try: - db_prod = client[secret.db_name] + db_prod = client[os.environ['db_name']] # Pack and send all articles pack_articles(db_prod, df) @@ -84,22 +84,24 @@ def pack_articles(db_prod, df): collection_list = db_prod.list_collection_names() if articles_collection_name not in collection_list: - db_prod.create_collection(articles_collection_name) - print(f"[INFO] Collection '{articles_collection_name}' created.") + db_prod.create_collection(articles_collection_name) + print(f"[INFO] Collection '{articles_collection_name}' created.") article_df = df.set_index('id') article_dict = article_df.T.to_dict('dict') for article_key in article_dict.keys(): - article = article_dict[article_key] - if ('openai_labels' not in article): - article["openai_labels"] = [] - else: - article["openai_labels"] = string_to_list(article["openai_labels"]) - article["dateSum"] = convert_to_datesum(article["pub_date"]) - article_payload.append(article) + article = article_dict[article_key] + if ('openai_labels' not in article): + article["openai_labels"] = [] + else: + article["openai_labels"] = string_to_list(article["openai_labels"]) + article["dateSum"] = convert_to_datesum(article["pub_date"]) + article_payload.append(article) articles_collection.insert_many(article_payload) + + print("[INFO] Articles Successfully inserted!") return except Exception as err: diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py index 3905ff0..e2242d2 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py @@ -1,7 +1,6 @@ import os import json import zipfile -import secret from art import * from bson import ObjectId from pymongo import MongoClient @@ -235,7 +234,7 @@ def bootstrap_MongoDB_Prod(client, defined_collection_names): if (client == None): raise Exception("No database was given!") - db_prod = client[secret.db_name] + db_prod = client[os.environ['db_name']] # Here we check for the upload collection and make it if it doesn't exist collection_list = db_prod.list_collection_names() diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py index 5defbc6..9a54d3e 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py @@ -1,4 +1,4 @@ -import secret +import os import pandas as pd from io import StringIO # Import StringIO from fastapi import UploadFile # For typing @@ -26,7 +26,7 @@ def is_duplicate_discarded(tag, discarded_collection): return discarded_collection.count_documents(queryDiscarded) > 0 def run_validation(client, df): - db_prod = client[secret.db_name] + db_prod = client[os.environ['db_name']] collection_list = db_prod.list_collection_names() if ('articles_data' in collection_list): diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py index 361036e..446315e 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py @@ -8,7 +8,6 @@ from google.cloud import pubsub_v1 import nltk -import secret from ML_Entry import run_pipeline from global_state import global_instance from Mongo_Utils.mongo_funcs import connect_MongoDB_Prod @@ -73,7 +72,7 @@ def startup_event(): db_prod = connect_MongoDB_Prod() db_manager = global_instance.get_data("db_manager") # We then create our first MongoDB connection - db_manager.init_connection(uri=secret.MONGO_URI_NAACP) + db_manager.init_connection(uri=os.environ['MONGO_URI_NAACP']) db_manager.run_job( bootstrap_MongoDB_Prod,