-
Notifications
You must be signed in to change notification settings - Fork 0
/
qda_predict.py
82 lines (67 loc) · 3.12 KB
/
qda_predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -*- coding: utf-8 -*-
"""QDA_predict.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Mh5o1EMtTXDR6W2e72WiejcIGN9LUOFQ
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.preprocessing as skl_pre
from sklearn.model_selection import train_test_split
import sklearn.linear_model as skl_lm
import sklearn.discriminant_analysis as skl_da
import sklearn.neighbors as skl_nb
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from numpy import mean
from numpy import std
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from IPython.core.pylabtools import figsize
figsize(10, 6) # Width and hight
#plt.style.use('seaborn-white')
# url = 'data/biopsy.csv'
url1 = 'train.csv'
url2 = 'test.csv'
movies_df_train = pd.read_csv(url1)
movies_df_test = pd.read_csv(url2)
print(movies_df_train.dtypes)
movies_df_train['Number of words lead'] = movies_df_train['Number of words lead'].astype(int) #converting float to int
movies_df_train['Age Lead'] = movies_df_train['Age Lead'].astype(int)
movies_df_train['Age Co-Lead'] = movies_df_train['Age Co-Lead'].astype(int)
movies_df_train['Lead'].replace('Male',0,inplace=True)
movies_df_train['Lead'].replace('Female',1,inplace=True)
print(movies_df_train.dtypes)
movies_df_test['Number of words lead'] = movies_df_test['Number of words lead'].astype(int) #converting float to int
movies_df_test['Age Lead'] = movies_df_test['Age Lead'].astype(int)
movies_df_test['Age Co-Lead'] = movies_df_test['Age Co-Lead'].astype(int)
print(movies_df_test.dtypes)
feature_train_df = movies_df_train.drop(["Lead"], axis=1)
#feature_df = feature_df[["Age Co-Lead","Age Lead","Number of female actors","Number of male actors","Difference in words lead and co-lead", "Number of words lead"]]
#feature_df = feature_df.drop(["Number words male", "Number words female"], axis=1)
feature_train_df = feature_train_df.drop(["Year"], axis=1)
feature_test_df = movies_df_test.drop(["Year"], axis=1)
#feature_df = feature_df.drop(["Gross"], axis=1)
label_train_df = movies_df_train["Lead"]
#train_x, test_x, train_y, test_y = train_test_split(feature_df, label_df, test_size=0.30, random_state=0)
#cv = KFold(n_splits=10, random_state=1, shuffle=True)
clf = QuadraticDiscriminantAnalysis()
clf.fit(feature_train_df,label_train_df)
pred_y=clf.predict(feature_test_df)
print(pred_y)
# matrix = metrics.confusion_matrix(test_y, pred_y)
# print(matrix)
# print(f"Accuracy: {np.mean(pred_y == test_y):.3f}")
# scores = cross_val_score(clf, feature_df, label_df, scoring='accuracy', cv=cv, n_jobs=-1)
# print('Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))
# pred_y_list = pred_y.tostring()
pred_y_list= pred_y.tolist()
# print(np.fromstring(pred_y_list, dtype=int))
converted_list = [str(element) for element in pred_y_list]
pred_y_string = ",".join(converted_list)
# from contextlib import redirect_stdout
with open('predictions.csv', 'w') as f:
f.write(pred_y_string)