-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtraining_Validation_Insertion.py
69 lines (54 loc) · 3.52 KB
/
training_Validation_Insertion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from datetime import datetime
from Training_Raw_data_validation.rawValidation import Raw_Data_validation
from DataTypeValidation_Insertion_Training.DataTypeValidation import dBOperation
from DataTransform_Training.DataTransformation import dataTransform
from application_logging import logger
class train_validation:
def __init__(self,path):
self.raw_data = Raw_Data_validation(path)
self.dataTransform = dataTransform()
self.dBOperation = dBOperation()
self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+')
self.log_writer = logger.App_Logger()
def train_validation(self):
try:
self.log_writer.log(self.file_object, 'Start of Validation on files!!')
# extracting values from prediction schema
LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema()
# getting the regex defined to validate filename
regex = self.raw_data.manualRegexCreation()
# validating filename of prediction files
self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile)
# validating column length in the file
self.raw_data.validateColumnLength(noofcolumns)
# validating if any column has all values missing
self.raw_data.validateMissingValuesInWholeColumn()
self.log_writer.log(self.file_object, "Raw Data Validation Complete!!")
self.log_writer.log(self.file_object, "Starting Data Transforamtion!!")
# below function adds quotes to the '?' values in some columns.
self.dataTransform.addQuotesToStringValuesInColumn()
self.log_writer.log(self.file_object, "DataTransformation Completed!!!")
self.log_writer.log(self.file_object,
"Creating Training_Database and tables on the basis of given schema!!!")
# create database with given name, if present open the connection! Create table with columns given in schema
self.dBOperation.createTableDb('Training', column_names)
self.log_writer.log(self.file_object, "Table creation Completed!!")
self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!")
# insert csv files in the table
self.dBOperation.insertIntoTableGoodData('Training')
self.log_writer.log(self.file_object, "Insertion in Table completed!!!")
self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!")
# Delete the good data folder after loading files in table
self.raw_data.deleteExistingGoodDataTrainingFolder()
self.log_writer.log(self.file_object, "Good_Data folder deleted!!!")
self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!")
# Move the bad files to archive folder
self.raw_data.moveBadFilesToArchiveBad()
self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!")
self.log_writer.log(self.file_object, "Validation Operation completed!!")
self.log_writer.log(self.file_object, "Extracting csv file from table")
# export data in table to csvfile
self.dBOperation.selectingDatafromtableintocsv('Training')
self.file_object.close()
except Exception as e:
raise e