-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathIris_data_study_classification.py
78 lines (61 loc) · 2.55 KB
/
Iris_data_study_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#Loading different subroutines for main program
#Import and Conversion, Normalization of Data
from Open_conversion_data import load_csv
from Open_conversion_data import str_column_to_float
from Open_conversion_data import str_column_to_int
from Open_conversion_data import dataset_minmax
from Open_conversion_data import Normalize_Dataset
#Splitting dataset in train, test or folds for cv
from Split_dataset import train_test_split
from Split_dataset import cross_validation_split
#KNN algorithm for classification
from My_KNN import getNeighbors
from My_KNN import getResponse
#Accuracy of the predictions
from Performance_assessment import getAccuracy
from Performance_assessment import confusion_matrix
from Performance_assessment import print_confusion_matrix
from Performance_assessment import recall_precision_calc
def main():
# Load iris dataset
filename = 'iris.csv'
dataset = load_csv(filename)
print('Loaded data file {0} with {1} rows and {2} columns'.format(filename, len(dataset), len(dataset[0])))
print('First line of dataset: ', dataset[0])
# convert string columns to float
for i in range(4):
str_column_to_float(dataset, i)
# convert class column to int
lookup = str_column_to_int(dataset, 4)
print('First line of dataset with class defined by integer: ', dataset[0])
print('')
print('Dictionary of lookup classes: ', lookup)
print('\n')
# normalization of dataset
minmax = dataset_minmax(dataset)
Normalize_Dataset(dataset, minmax)
# Splitting dataset between Training and Testing Set
split = 0.6
trainingSet, testSet = train_test_split(dataset, split)
#generate predictions
print('Algorithm solving:')
predictions = []
num_neighbors = 3
for i in range(len(testSet)):
neighbors = getNeighbors(trainingSet, testSet[i], num_neighbors, "Euclidean")
classify = getResponse(neighbors)
predictions.append(classify)
print('> predicted=' + repr(classify) + ', actual=' + repr(testSet[i][-1]))
#Accuracy Assessment
accuracy = getAccuracy(testSet,predictions)
print('Accuracy :' + repr(accuracy) + '%')
unique, matrix = confusion_matrix(testSet, predictions)
print('\n')
print_confusion_matrix(unique, matrix)
print('\n')
#Calculate properties for recall and precision
Recall, Precision, F1_score = recall_precision_calc(matrix)
print('Recall:', Recall)
print('Precision:', Precision)
print('F1 score:', F1_score)
main()