-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
64 lines (53 loc) · 1.94 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sn
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from sklearn import preprocessing
import statsmodels.api as sm
import scipy.optimize as opt
import numpy as np
import pylab as pl
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import jaccard_score
# Dataset
chd_data = pd.read_csv('massachusetts.csv')
chd_data.drop(['education'], inplace=True, axis=1)
# Removing NaN
chd_data.dropna(axis=0, inplace=True)
# print(chd_data.head(), chd_data.shape)
# print(chd_data.TenYearCHD.value_counts())
# Counting no. of patients affected with CHD
plt.figure(figsize=(8, 6))
sn.countplot(x="TenYearCHD", data=chd_data, palette="BuGn_r")
# plt.show()
# Train and test sets
# ----------------------
# Declaration of x and y variables (axis)
x = np.asarray(chd_data[['age', 'male', 'cigsPerDay', 'totChol', 'glucose']])
y = np.asarray(chd_data['TenYearCHD'])
# Normalize the dataset
x = preprocessing.StandardScaler().fit(x).transform(x)
# Actually train and test x and y sets
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.3, random_state=4)
print('Train set: ', x_train.shape, y_train.shape)
print('Test set: ', x_test.shape, y_test.shape)
# Modeling the dataset
log_reg = LogisticRegression()
log_reg.fit(x_train, y_train)
y_pred = log_reg.predict(x_test)
# Evaluation and accuracy
print('')
print('Accuracy of the model in Jaccard score is : ',
jaccard_score(y_test, y_pred))
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
cm_setup = pd.DataFrame(data=cm, columns=[
'Preddicted:0', 'Preddicted:1'], index=['Actual:0', 'Actual:1'])
plt.figure(figsize=(9, 6))
sn.heatmap(cm_setup, annot=True, fmt='d', cmap="Greens")
plt.show()
print('The details for confusion matrix is : ')
print(classification_report(y_test, y_pred))