Skip to content

Commit

Permalink
Add method to create a more complete data file for model training
Browse files Browse the repository at this point in the history
  • Loading branch information
ChronoBoot committed Jan 7, 2024
1 parent 3c8b541 commit 26c6015
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 2 deletions.
18 changes: 16 additions & 2 deletions backend/src/data_processing/simple_read_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,21 @@ def read_data(self, files_path: str, concat: bool, sampling_frequency: int) -> p
temp_data = pd.read_csv(f"{files_path}/{file_name}")
temp_data = temp_data[temp_data['SK_ID_CURR'].isin(train_data['SK_ID_CURR'])]
aggregated_data = aggregation_method(temp_data)
data = pd.merge(data, aggregated_data, on="SK_ID_CURR")
data = pd.merge(data, aggregated_data, on="SK_ID_CURR", how="outer")

# Concatenate all the data into a single DataFrame
return data
return data

def write_data_for_model(self, files_path : str, filename: str):
"""
Write the data for the model.
It is a merge of the training data and the aggregated data from the other tables.
Parameters:
files_path (str): The path where the file are located.
filename (str): The name of the file to write.
"""

data = self.read_data(files_path, concat=True, sampling_frequency=1)

data.to_csv(f"{files_path}/{filename}", index=False)
6 changes: 6 additions & 0 deletions backend/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
reader = SimpleReadData()

FILES_FOLDER = 'data'
DATA_FILE_MODEL = 'data_for_model.csv'

@app.route('/test', methods=['GET'])
def test():
Expand Down Expand Up @@ -46,6 +47,11 @@ def most_important_features():
features = predictor.get_most_important_features(nb_features)
return jsonify({'features': features.to_dict()}), 200

@app.route('/write_model_data', methods=['GET'])
def write_model_data():
reader.write_data_for_model(FILES_FOLDER, DATA_FILE_MODEL)
return jsonify({'message': 'Model data written successfully'}), 200

if __name__ == '__main__':
app.run(debug=True)

21 changes: 21 additions & 0 deletions backend/tests/data_processing/test_simple_read_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,27 @@ def test_read_data_concat(self, mock_pos, mock_previous, mock_installments, mock

self.assertTrue(isinstance(result, pd.DataFrame))
pd.testing.assert_frame_equal(result, expected_result)

@patch('pandas.DataFrame.to_csv')
@patch('backend.src.data_processing.simple_read_data.SimpleReadData.read_data')
def test_write_data_for_model(self, mock_read_data, mock_to_csv):
# Create a mock DataFrame to return from read_data
mock_df = pd.DataFrame({
'SK_ID_CURR': [1, 2, 3],
'DATA': ['A', 'B', 'C']
})
mock_read_data.return_value = mock_df

mock_path = 'mock_path'
mock_file = 'mock_file'

# Create an instance of the class and call the method
self.reader.write_data_for_model(mock_path, mock_file)

# Check that the result is as expected
mock_read_data.assert_called_once_with(mock_path, concat = True, sampling_frequency = 1)
mock_df.to_csv.assert_called_once_with(f"{mock_path}/{mock_file}", index=False)



if __name__ == '__main__':
Expand Down

0 comments on commit 26c6015

Please sign in to comment.