diff --git a/Code/1Days.py b/Code/1Days.py new file mode 100644 index 0000000..d779ad2 --- /dev/null +++ b/Code/1Days.py @@ -0,0 +1,38 @@ +import pandas as pd +from sklearn.preprocessing import Imputer, LabelEncoder, OneHotEncoder, StandardScaler +from sklearn.cross_validation import train_test_split +from sklearn.linear_model import LinearRegression +import matplotlib.pyplot as plt + +dataset = pd.read_csv('../datasets/Data.csv') + +X = dataset.iloc[:, : -1].values +Y = dataset.iloc[:, 3].values + +imputer = Imputer(missing_values='NaN', strategy='mean', axis=0) +imputer = imputer.fit(X[:, 1:3]) +X[:, 1:3] = imputer.transform(X[:, 1:3]) + +labelencoder_X = LabelEncoder() +X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) +onehotencoder = OneHotEncoder(categorical_features=[0]) +X = onehotencoder.fit_transform(X).toarray() +labelencoder_Y = LabelEncoder() +Y = labelencoder_Y.fit_transform(Y) + +X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) + +sc_X = StandardScaler() +X_train = sc_X.fit_transform(X_train) +X_test = sc_X.fit_transform(X_test) + +regressor = LinearRegression() +regressor = regressor.fit(X_train, Y_train) + +Y_pred = regressor.predict(X_test) + +plt.scatter(X_train, Y_train, color='red') +plt.plot(X_train, regressor.predict(X_train), color='blue') + +plt.scatter(X_test, Y_test, color='red') +plt.plot(X_test, regressor.predict(X_test), color='blue') diff --git a/Code/2Days.py b/Code/2Days.py new file mode 100644 index 0000000..38eb8d0 --- /dev/null +++ b/Code/2Days.py @@ -0,0 +1,28 @@ +import numpy as np +import pandas as pd +from sklearn.preprocessing import Imputer, LabelEncoder, OneHotEncoder, StandardScaler +from sklearn.cross_validation import train_test_split +from sklearn.linear_model import LinearRegression +import matplotlib.pyplot as plt + +dataset = pd.read_csv('../datasets/studentscores.csv') + +X = dataset.iloc[:, : -1].values +Y = dataset.iloc[:, 1].values + + +X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) + +regressor = LinearRegression() +regressor = regressor.fit(X_train, Y_train) + +Y_pred = regressor.predict(X_test) + +fig_train = plt.figure(1) +plt.scatter(X_train, Y_train, color='red') +plt.plot(X_train, regressor.predict(X_train), color='blue') + +fig_test = plt.figure(2) +plt.scatter(X_test, Y_test, color='red') +plt.plot(X_test, regressor.predict(X_test), color='blue') +plt.show() diff --git a/Code/3Days.py b/Code/3Days.py new file mode 100644 index 0000000..ec0f0f8 --- /dev/null +++ b/Code/3Days.py @@ -0,0 +1,26 @@ +import numpy as np +import pandas as pd + +dataset = pd.read_csv("../datasets/50_Startups.csv") +X = dataset.iloc[:, :-1].values +Y = dataset.iloc[:, -1] + +from sklearn.preprocessing import LabelEncoder, OneHotEncoder +labelencoder = LabelEncoder() +X[:, 3] = labelencoder.fit_transform(X[:, 3]) +onehotencoder = OneHotEncoder(categorical_features=[3]) +X = onehotencoder.fit_transform(X).toarray() + + +X = X[:, 1:] + +from sklearn.cross_validation import train_test_split + +X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) + +from sklearn.linear_model import LinearRegression +regressor = LinearRegression() +regressor = regressor.fit(X_train, Y_train) + +y_pred = regressor.predict(X_test) +print(y_pred) \ No newline at end of file diff --git a/Code/Day 6 Logistic Regression.md b/Code/Day 6 Logistic Regression.md index 1a1eace..5db5f6f 100644 --- a/Code/Day 6 Logistic Regression.md +++ b/Code/Day 6 Logistic Regression.md @@ -81,6 +81,42 @@ cm = confusion_matrix(y_test, y_pred) ## Visualization +``` +import matplotlib.pyplot as plt + +theta = classifier.coef_ +b = classifier.intercept_ +# line equation: age * theta_1 + salary * theta2 + b = 0 +age_plot = [i/10 for i in range(-20, 20)] +salary_plot = -1 * (theta[0, 0] * np.array(age_plot) + b)/theta[0, 1] + + +def plot_result(x, y, type='train'): + xlim = [-3, 3] + ylim = [-2.5, 3.5] + x_positive = x[np.where(y == 1)] + x_negative = x[np.where(y == 0)] + fig_train = plt.figure() + ax = fig_train.add_subplot(111) + plt.xlabel('Age') + plt.ylabel('Salary') + plt.title('Logistic Regresstion (%s set)' % type) + + ax.plot(age_plot, salary_plot, c='r') + plt.fill_between(age_plot, salary_plot, ylim[-1], color='lawngreen') + plt.fill_between(age_plot, ylim[0], salary_plot, color='hotpink') + + ax.scatter(x_negative[:, 0], x_negative[:, 1], c='r', label='0') + ax.scatter(x_positive[:, 0], x_positive[:, 1], c='g', label='1') + ax.set_xlim(xlim) + ax.set_ylim(ylim) + plt.legend() + plt.show() + + +plot_result(x_train, y_train, type="Train") +plot_result(x_test, y_test, type="Test") +```

diff --git a/Code/Day6.py b/Code/Day6.py new file mode 100644 index 0000000..8a95e37 --- /dev/null +++ b/Code/Day6.py @@ -0,0 +1,79 @@ +import numpy as np +import pandas as pd + +dataset = pd.read_csv('..\datasets\Social_Network_Ads.csv') + +x = dataset.iloc[:, [2, 3]].values +y = dataset.iloc[:, 4].values + +from sklearn.cross_validation import train_test_split +x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0) + +from sklearn.preprocessing import StandardScaler + +sc = StandardScaler() +x_train = sc.fit_transform(x_train) +x_test = sc.fit_transform(x_test) + +def sigmod(theta): + import math + y = 1/(1 + math.e**(-theta)) + return y + + +from sklearn.linear_model import LogisticRegression +classifier = LogisticRegression() +classifier.fit(x_train, y_train) + +y_pred = classifier.predict(x_test) + + +# out = sigmod(yy_train) +from sklearn.metrics import confusion_matrix +cm = confusion_matrix(y_test, y_pred) + +# plot data set and boundary +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap + +theta = classifier.coef_ +b = classifier.intercept_ +# line equation: age * theta_0 + salary * theta1 + b = 0 +age_plot = [i/10 for i in range(-40, 40)] +salary_plot = -1 * (theta[0, 0] * np.array(age_plot) + b)/theta[0, 1] + + +def plot_result(x, y, type='train'): + xlim = [-3, 3] + ylim = [-2.5, 3.5] + x_positive = x[np.where(y == 1)] + x_negative = x[np.where(y == 0)] + fig_train = plt.figure() + ax = fig_train.add_subplot(111) + plt.xlabel('Age') + plt.ylabel('Salary') + plt.title('Logistic Regresstion (%s set)' % type) + + X_set, y_set = x_test, y_test + X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), + np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) + plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), + alpha=0.55, cmap=ListedColormap(('red', 'green'))) + + # ax.plot(age_plot, salary_plot, c='r') + # plt.fill_between(age_plot, salary_plot, ylim[-1], color='lawngreen') + # plt.fill_between(age_plot, ylim[0], salary_plot, color='hotpink') + # + ax.scatter(x_negative[:, 0], x_negative[:, 1], c='r', label='0') + ax.scatter(x_positive[:, 0], x_positive[:, 1], c='g', label='1') + ax.set_xlim((X1.min(), X1.max())) + ax.set_ylim((X2.min(), X2.max())) + plt.legend() + plt.show() + + +plot_result(x_train, y_train, type="Train") +plot_result(x_test, y_test, type="Test") + + +