Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rdrp 787 test export #358

Merged
merged 26 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
938c71f
Prepared mod runner
zorge69 Oct 3, 2024
5ab04d5
Created rd_delete_file
zorge69 Oct 3, 2024
e42ef17
Created rd_md5sum)
zorge69 Oct 3, 2024
7bfd56d
Added rd_isdir
zorge69 Oct 3, 2024
13eea1c
Added rd_isfile
zorge69 Oct 3, 2024
b13c1f5
Added and tested rd_stat_size
zorge69 Oct 4, 2024
33ca7fc
Created and tested rd_read_header
zorge69 Oct 4, 2024
fce04ab
Created and tested rd_write_string_to_file
zorge69 Oct 4, 2024
8d4a0f4
Added and tested rd_copy_file
zorge69 Oct 4, 2024
f703cd6
Added rd_move_file
zorge69 Oct 4, 2024
3862631
Added rd_search_file
zorge69 Oct 4, 2024
b937ba5
Updated copy and move
zorge69 Oct 7, 2024
d8b1fef
Improved style
zorge69 Oct 7, 2024
d846802
Fixed encoding in write string
zorge69 Oct 7, 2024
1f0514e
Changed platform to network
zorge69 Oct 7, 2024
dd8417e
Added read_excel
zorge69 Oct 8, 2024
8348617
Added read_csv and openpyxl requirement
zorge69 Oct 8, 2024
5ad9092
update user and dev config for s3 testing
AnneONS Oct 22, 2024
d4d2c45
merged in develop
AnneONS Oct 22, 2024
e45710c
Changed platform to network so it can pass the unit test
zorge69 Oct 22, 2024
2f55366
update the export mod tests
AnneONS Oct 22, 2024
1276fbd
merge in develop
AnneONS Oct 22, 2024
e374ef9
Removed red_excel function from s3_mods and openpyxl from requirement…
zorge69 Oct 22, 2024
8f2187d
Updated rd_copy_file doc string
zorge69 Oct 22, 2024
1aef038
Commented three requirements so the unit test can pass
zorge69 Oct 22, 2024
545784c
update version to 2.2.0
AnneONS Oct 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions export_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
import os

from importlib import reload

# Change to the project repository location
my_wd = os.getcwd()
my_repo = "research-and-development"
if not my_wd.endswith(my_repo):
os.chdir(my_repo)

from src.outputs import export_files

reload(export_files)
Expand Down
95 changes: 95 additions & 0 deletions export_mods_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Script that creates all directories"""
import os

# Change to the project repository location
my_wd = os.getcwd()
my_repo = "research-and-development"
if not my_wd.endswith(my_repo):
os.chdir(my_repo)

from src.utils.singleton_boto import SingletonBoto

config = {
"s3": {
"ssl_file": "/etc/pki/tls/certs/ca-bundle.crt",
"s3_bucket": "onscdp-dev-data01-5320d6ca"
}
}

boto3_client = SingletonBoto.get_client(config)
import src.utils.s3_mods as mods
zorge69 marked this conversation as resolved.
Show resolved Hide resolved


if __name__ == "__main__":

my_path = "/bat/res_dev/project_data/2023_surveys/BERD/01_staging/staging_qa/full_responses_qa/2023_staged_BERD_full_responses_24-10-02_v20.csv"
my_dir = "/bat/res_dev/project_data/2023_surveys/BERD/01_staging/staging_qa/full_responses_qa/"
# # Checking that file exists
# my_size = mods.rd_file_size(my_path)
# print(f"File size is {my_size}")

# # Deleting a file
# status = mods.rd_delete_file(my_path)
# if status:
# print(f"File {my_path} successfully deleted")

# # Calculating md5sum
# my_sum = mods.rd_md5sum(my_path)
# expected_output = "ea94424aceecf11c8a70d289e51c34ea"
# print(type(my_sum))
# if expected_output == my_sum:
# print("Same md5sum")

# # Calculating rd_isdir
# mydir = "bat"
# response = mods.rd_isdir(mydir)

# print("Got response")
# print(response)

# # Checking rd_isfile
# response = mods.rd_isfile(my_path)
# print(response)

# # Checking that rd_stat_size works for files and directories
# file_size = mods.rd_stat_size(my_path)
# print(f"File {my_path} size is {file_size} bytes.")

# dir_size = mods.rd_stat_size(my_dir)
# print(f"Directory {my_dir} size is {dir_size} bytes.")

# # Testing rd_read_header 
# response = mods.rd_read_header(my_path)
# print(response)

# # Testing rd_write_string_to_file
# out_path = "/bat/res_dev/project_data/write_string_test.txt"
# content = "New content"
# mods.rd_write_string_to_file(content.encode(encoding="utf-8"), out_path)
# print("all done")

# # Testing rd_copy_file
# src_path = "/bat/res_dev/project_data/write_string_test.txt"
# dst_path = "/bat/res_dev/"
# success = mods.rd_copy_file(src_path, dst_path)
# if success:
# print("File copied successfully")

# # Testing rd_move_file
# src_path = "/bat/res_dev/write_string_test_copy.txt"
# dst_path = "/bat/res_dev/project_data/"
# success = mods.rd_move_file(src_path, dst_path)
# if success:
# print("File moved successfully")

# # Testing rd_search_file
# dir_path = "bat/res_dev/project_data/2023_surveys/BERD/01_staging/staging_qa/full_responses_qa/"
# ending = "24-10-02_v20.csv"

# found_file = mods.rd_search_file(dir_path, ending)
# print(f"Found file: {found_file}")

# Testing read_excel
my_path = "bat/res_dev/project_data/test_excel_gz.xlsx"
df = mods.read_excel(my_path)
print(df.head())
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ typing
# boto3
# raz_client
# rdsa-utils==2.0.2
openpyxl
2 changes: 1 addition & 1 deletion src/dev_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ global:
table_config: "SingleLine"
# Environment settings
dev_test : False
platform: network #whether to load from hdfs, network (Windows) or s3 (CDP)
platform: network # network #whether to load from hdfs, network (Windows) or s3 (CDP)
load_from_feather: False
runlog_writer:
write_csv: True # Write the runlog to a CSV file
Expand Down
10 changes: 6 additions & 4 deletions src/outputs/export_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def check_files_exist(file_list: List, config: dict, isfile: callable):
for file in file_list:
file_path = Path(file) # Changes to path if str
OutgoingLogger.debug(f"Using {platform} isfile function")
if not isfile(file_path):
if not isfile(str(file_path)):
OutgoingLogger.error(
f"File {file} does not exist. Check existence and spelling"
)
Expand All @@ -152,7 +152,7 @@ def transfer_files(source, destination, method, logger, copy_files, move_files):
"""
transfer_func = {"copy": copy_files, "move": move_files}[method]
past_tense = {"copy": "copied", "move": "moved"}[method]
transfer_func(source, destination)
transfer_func(str(source), destination)

logger.info(f"Files {source} successfully {past_tense} to {destination}.")

Expand Down Expand Up @@ -227,10 +227,12 @@ def run_export(user_config_path: str, dev_config_path: str):
platform = config["global"]["platform"]

if platform == "s3":
# create singletion boto3 client object & pass in bucket string
from src.utils.singleton_boto import SingletonBoto

boto3_client = SingletonBoto.get_client(config) # noqa
from src.utils import s3_mods as mods

# Creating boto3 client and adding it to the config dict
config["client"] = mods.create_client(config)
elif platform == "network":
# If the platform is "network" or "hdfs", there is no need for a client.
# Adding a client = None for consistency.
Expand Down
2 changes: 0 additions & 2 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ def run_pipeline(user_config_path, dev_config_path):
boto3_client = SingletonBoto.get_client(config) # noqa
from src.utils import s3_mods as mods

# Creating boto3 client and adding it to the config dict
# config["client"] = boto3_client
elif platform == "network":
# If the platform is "network" or "hdfs", there is no need for a client.
# Adding a client = None for consistency.
Expand Down
4 changes: 2 additions & 2 deletions src/user_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ export_choices:
export_fte_total_qa: None
export_status_filtered: None
export_frozen_group: None
export_staged_BERD_full_responses: None
export_staged_BERD_full_responses: "2023_staged_BERD_full_responses_24-10-14_v33.csv"
export_staged_NI_full_responses: None
export_full_responses_imputed: None
export_full_estimation_qa: None # "2022_full_estimation_qa_24-07-15_v555.csv"
export_invalid_unrecognised_postcodes: "2022_invalid_unrecognised_postcodes_24-07-04_v503.csv"
export_invalid_unrecognised_postcodes: None # "2022_invalid_unrecognised_postcodes_24-07-04_v503.csv"
Loading
Loading