Skip to content

Commit

Permalink
Add option to not rewrite the csv files if already existing
Browse files Browse the repository at this point in the history
  • Loading branch information
ChronoBoot committed Jan 17, 2024
1 parent 2e1805a commit 4d2fd5c
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 15 deletions.
11 changes: 8 additions & 3 deletions backend/src/data_processing/simple_load_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import logging

from dotenv import load_dotenv
import requests
from backend.src.data_processing.load_data_abc import LoadData
import os
Expand All @@ -28,7 +27,8 @@ class SimpleLoadData(LoadData):
]

def __init__(self) -> None:
logging.basicConfig(level=logging.DEBUG)
log_format = "%(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(format=log_format, level=logging.DEBUG)
logging.debug("SimpleLoadData initialized")

@conditional_profile
Expand All @@ -40,7 +40,7 @@ def download_file(self, url: str, filepath: str) -> None:
f.write(chunk)

@conditional_profile
def load(self, file_urls: list, download_path: str) -> None:
def load(self, file_urls: list, download_path: str, rewrite = False) -> None:
"""
Load data from Azure Blob Storage and save it to a local directory.
Expand All @@ -58,6 +58,11 @@ def load(self, file_urls: list, download_path: str) -> None:
for url in file_urls:
filename = url.split('/')[-1] # Extracts the file name
filepath = f"{download_path}/{filename}"

if os.path.exists(filepath) and not rewrite:
logging.info(f"File {filename} already exists in {download_path}. Skipping download.")
continue

self.download_file(url, filepath)
logging.info(f"Downloaded file {filename} from Azure Blob Storage to {filepath}")
except Exception as e:
Expand Down
3 changes: 2 additions & 1 deletion backend/src/data_processing/simple_read_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ class SimpleReadData(ReadDataABC):
CHUNK_SIZE = 10000

def __init__(self) -> None:
logging.basicConfig(level=logging.DEBUG)
log_format = "%(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(format=log_format, level=logging.DEBUG)
logging.debug("SimpleReadData initialized")

@conditional_profile
Expand Down
9 changes: 7 additions & 2 deletions backend/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
predictor = RandomForestLoanPredictor()
loader = SimpleLoadData()
reader = SimpleReadData()
logging.basicConfig(level=logging.DEBUG)

log_format = "%(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(format=log_format, level=logging.DEBUG)
app.logger = logging.getLogger(__name__)
app.logger.addHandler(logging.StreamHandler())
app.logger.setLevel(logging.DEBUG)
Expand All @@ -34,11 +36,14 @@ def test():

@app.route('/train', methods=['POST'])
def train():
app.logger.info('Training model...')
data = request.get_json()
sampling_frequency = int(data['sampling_frequency'])
target_variable = data['target_variable']
rewrite = data['rewrite'] if 'rewrite' in data else "False"
rewrite_bool = True if rewrite == "True" else False

loader.load(SimpleLoadData.CSV_URLS, FILES_FOLDER)
loader.load(SimpleLoadData.CSV_URLS, FILES_FOLDER, rewrite_bool)
reader.write_data(FILES_FOLDER, DATA_FILE_MODEL, sampling_frequency)

loans = reader.read_data(FILES_FOLDER, DATA_FILE_MODEL)
Expand Down
4 changes: 3 additions & 1 deletion backend/src/models/random_forest_loan_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def __init__(self) -> None:
self.y_test = None
self.random_state = 42
self.test_size = 0.2
logging.basicConfig(level=logging.DEBUG)

log_format = "%(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(format=log_format, level=logging.DEBUG)
logging.debug("RandomForestLoanPredictor initialized")

@conditional_profile
Expand Down
7 changes: 1 addition & 6 deletions backend/tests/data_processing/test_simple_load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,7 @@ def test_load(self, mock_download_file):
# Assert
mock_download_file.assert_called_once_with('https://www.something.com/test.txt', 'test_download_path/test.txt')

@patch.dict('os.environ', {
'AZURE_STORAGE_CONNECTION_STRING': 'DefaultEndpointsProtocol=https;AccountName=testaccount;AccountKey=testkey;BlobEndpoint=testendpoint',
'AZURE_STORAGE_CONTAINER_NAME': 'test_container_name'
})
@patch('backend.src.data_processing.simple_load_data.load_dotenv')
def test_save(self, mock_load_dotenv):
def test_save(self):
# Arrange
simple_load_data = SimpleLoadData()

Expand Down
3 changes: 2 additions & 1 deletion backend/utils/profiling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@


def is_profiling_enabled():
return bool(os.getenv('ENABLE_PROFILING', False))
enable_profiling = os.getenv('ENABLE_PROFILING', "False")
return True if enable_profiling == "True" else False

def conditional_profile(func):
if is_profiling_enabled():
Expand Down
3 changes: 2 additions & 1 deletion frontend/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ def _main(FREQUENCY : int):
load_dotenv()

# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
log_format = "%(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(format=log_format, level=logging.DEBUG)

# Parse command line arguments
parser = argparse.ArgumentParser(description="Loan prediction application")
Expand Down
3 changes: 3 additions & 0 deletions frontend/src/ui/dash_user_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ def __init__(self, categorical_values : dict, float_values: dict, loan_example:

self.app.layout = self._create_layout()

log_format = "%(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(format=log_format, level=logging.DEBUG)

def get_nb_steps(self, min, max) -> int:
"""
Gets the number of steps for the user interface slider.
Expand Down

0 comments on commit 4d2fd5c

Please sign in to comment.