diff --git a/LR(placement_ananlysis).ipynb b/LR(placement_ananlysis).ipynb new file mode 100644 index 00000000..0777bc40 --- /dev/null +++ b/LR(placement_ananlysis).ipynb @@ -0,0 +1,2063 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyOiYcn/jlZQbitGFtUTFLqo", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "auM6-TKX8V_j" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "source": [ + "df =pd.read_csv(\"/content/placement.csv\")" + ], + "metadata": { + "id": "LhQNwzpX8285" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "APIrE--g89Df", + "outputId": "45cec2e3-40c5-4941-dd48-b1531d71e926" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cgpa package\n", + "0 6.89 3.26\n", + "1 5.12 1.98\n", + "2 7.82 3.25\n", + "3 7.42 3.67\n", + "4 6.94 3.57" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cgpapackage
06.893.26
15.121.98
27.823.25
37.423.67
46.943.57
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 200,\n \"fields\": [\n {\n \"column\": \"cgpa\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.0694086798895523,\n \"min\": 4.26,\n \"max\": 9.58,\n \"num_unique_values\": 163,\n \"samples\": [\n 8.71,\n 6.37,\n 7.94\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"package\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6916442411250219,\n \"min\": 1.37,\n \"max\": 4.62,\n \"num_unique_values\": 138,\n \"samples\": [\n 3.76,\n 3.13,\n 2.89\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "0_JqOyni8-Lw", + "outputId": "4ec68292-0698-4bac-b40c-958cbb8c90b5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " cgpa package\n", + "count 200.000000 200.000000\n", + "mean 6.990500 2.996050\n", + "std 1.069409 0.691644\n", + "min 4.260000 1.370000\n", + "25% 6.190000 2.487500\n", + "50% 6.965000 2.995000\n", + "75% 7.737500 3.492500\n", + "max 9.580000 4.620000" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cgpapackage
count200.000000200.000000
mean6.9905002.996050
std1.0694090.691644
min4.2600001.370000
25%6.1900002.487500
50%6.9650002.995000
75%7.7375003.492500
max9.5800004.620000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"cgpa\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 68.59624286082793,\n \"min\": 1.0694086798895523,\n \"max\": 200.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 6.990500000000001,\n 6.965,\n 200.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"package\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 69.77914766041287,\n \"min\": 0.6916442411250219,\n \"max\": 200.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2.9960500000000003,\n 2.995,\n 200.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "#Let,s draw a scatterplot so that i can know linear regression properly\n", + "\n", + "plt.scatter(df['cgpa'], df['package'])\n", + "plt.xlabel(\"CGPA\")\n", + "plt.ylabel(\"Package (In LPA)\")\n", + "plt.title(\"Placement Analysis\")\n", + "plt.show()\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 472 + }, + "id": "OhRpjTaL9AiH", + "outputId": "e3cd87ae-0fea-4052-90ae-ae8a29fa9da8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "x= df.iloc[:,0:1].values\n", + "y= df.iloc[:,-1].values\n" + ], + "metadata": { + "id": "KMEQDbE5-KhA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zfYSGO4V-wbN", + "outputId": "13d5f7ea-4f55-4881-cb6a-5aeb071a72c5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[6.89],\n", + " [5.12],\n", + " [7.82],\n", + " [7.42],\n", + " [6.94],\n", + " [7.89],\n", + " [6.73],\n", + " [6.75],\n", + " [6.09],\n", + " [8.31],\n", + " [5.32],\n", + " [6.61],\n", + " [8.94],\n", + " [6.93],\n", + " [7.73],\n", + " [7.25],\n", + " [6.84],\n", + " [5.38],\n", + " [6.94],\n", + " [7.48],\n", + " [7.28],\n", + " [6.85],\n", + " [6.14],\n", + " [6.19],\n", + " [6.53],\n", + " [7.28],\n", + " [8.31],\n", + " [5.42],\n", + " [5.94],\n", + " [7.15],\n", + " [7.36],\n", + " [8.1 ],\n", + " [6.96],\n", + " [6.35],\n", + " [7.34],\n", + " [6.87],\n", + " [5.99],\n", + " [5.9 ],\n", + " [8.62],\n", + " [7.43],\n", + " [9.38],\n", + " [6.89],\n", + " [5.95],\n", + " [7.66],\n", + " [5.09],\n", + " [7.87],\n", + " [6.07],\n", + " [5.84],\n", + " [8.63],\n", + " [8.87],\n", + " [9.58],\n", + " [9.26],\n", + " [8.37],\n", + " [6.47],\n", + " [6.86],\n", + " [8.2 ],\n", + " [5.84],\n", + " [6.6 ],\n", + " [6.92],\n", + " [7.56],\n", + " [5.61],\n", + " [5.48],\n", + " [6.34],\n", + " [9.16],\n", + " [7.36],\n", + " [7.6 ],\n", + " [5.11],\n", + " [6.51],\n", + " [7.56],\n", + " [7.3 ],\n", + " [5.79],\n", + " [7.47],\n", + " [7.78],\n", + " [8.44],\n", + " [6.85],\n", + " [6.97],\n", + " [6.94],\n", + " [8.99],\n", + " [6.59],\n", + " [7.18],\n", + " [7.63],\n", + " [6.1 ],\n", + " [5.58],\n", + " [8.44],\n", + " [4.26],\n", + " [4.79],\n", + " [7.61],\n", + " [8.09],\n", + " [4.73],\n", + " [6.42],\n", + " [7.11],\n", + " [6.22],\n", + " [7.9 ],\n", + " [6.79],\n", + " [5.83],\n", + " [6.63],\n", + " [7.11],\n", + " [5.98],\n", + " [7.69],\n", + " [6.61],\n", + " [7.95],\n", + " [6.71],\n", + " [5.13],\n", + " [7.05],\n", + " [7.62],\n", + " [6.66],\n", + " [6.13],\n", + " [6.33],\n", + " [7.76],\n", + " [7.77],\n", + " [8.18],\n", + " [5.42],\n", + " [8.58],\n", + " [6.94],\n", + " [5.84],\n", + " [8.35],\n", + " [9.04],\n", + " [7.12],\n", + " [7.4 ],\n", + " [7.39],\n", + " [5.23],\n", + " [6.5 ],\n", + " [5.12],\n", + " [5.1 ],\n", + " [6.06],\n", + " [7.33],\n", + " [5.91],\n", + " [6.78],\n", + " [7.93],\n", + " [7.29],\n", + " [6.68],\n", + " [6.37],\n", + " [5.84],\n", + " [6.05],\n", + " [7.2 ],\n", + " [6.1 ],\n", + " [5.64],\n", + " [7.14],\n", + " [7.91],\n", + " [7.19],\n", + " [7.91],\n", + " [6.76],\n", + " [6.93],\n", + " [4.85],\n", + " [6.17],\n", + " [5.84],\n", + " [6.07],\n", + " [5.66],\n", + " [7.57],\n", + " [8.28],\n", + " [6.3 ],\n", + " [6.12],\n", + " [7.37],\n", + " [7.94],\n", + " [7.08],\n", + " [6.98],\n", + " [7.38],\n", + " [6.47],\n", + " [5.95],\n", + " [8.71],\n", + " [7.13],\n", + " [7.3 ],\n", + " [5.53],\n", + " [8.93],\n", + " [9.06],\n", + " [8.21],\n", + " [8.6 ],\n", + " [8.13],\n", + " [8.65],\n", + " [9.31],\n", + " [6.22],\n", + " [8.01],\n", + " [6.93],\n", + " [6.75],\n", + " [7.32],\n", + " [7.04],\n", + " [6.29],\n", + " [7.09],\n", + " [8.15],\n", + " [7.14],\n", + " [6.19],\n", + " [8.22],\n", + " [5.88],\n", + " [7.28],\n", + " [7.88],\n", + " [6.31],\n", + " [7.84],\n", + " [6.26],\n", + " [7.35],\n", + " [8.11],\n", + " [6.19],\n", + " [7.28],\n", + " [8.25],\n", + " [4.57],\n", + " [7.89],\n", + " [6.93],\n", + " [5.89],\n", + " [7.21],\n", + " [7.63],\n", + " [6.22]])" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "YTSR_NP6-yQB", + "outputId": "8ecc7b4c-d999-41b3-db86-b402073ed211" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([3.26, 1.98, 3.25, 3.67, 3.57, 2.99, 2.6 , 2.48, 2.31, 3.51, 1.86,\n", + " 2.6 , 3.65, 2.89, 3.42, 3.23, 2.35, 2.09, 2.98, 2.83, 3.16, 2.93,\n", + " 2.3 , 2.48, 2.71, 3.65, 3.42, 2.16, 2.24, 3.49, 3.26, 3.89, 3.08,\n", + " 2.73, 3.42, 2.87, 2.84, 2.43, 4.36, 3.33, 4.02, 2.7 , 2.54, 2.76,\n", + " 1.86, 3.58, 2.26, 3.26, 4.09, 4.62, 4.43, 3.79, 4.11, 2.61, 3.09,\n", + " 3.39, 2.74, 1.94, 3.09, 3.31, 2.19, 1.61, 2.09, 4.25, 2.92, 3.81,\n", + " 1.63, 2.89, 2.99, 2.94, 2.35, 3.34, 3.62, 4.03, 3.44, 3.28, 3.15,\n", + " 4.6 , 2.21, 3. , 3.44, 2.2 , 2.17, 3.49, 1.53, 1.48, 2.77, 3.55,\n", + " 1.48, 2.72, 2.66, 2.14, 4. , 3.08, 2.42, 2.79, 2.61, 2.84, 3.83,\n", + " 3.24, 4.14, 3.52, 1.37, 3. , 3.74, 2.82, 2.19, 2.59, 3.54, 4.06,\n", + " 3.76, 2.25, 4.1 , 2.37, 1.87, 4.21, 3.33, 2.99, 2.88, 2.65, 1.73,\n", + " 3.02, 2.01, 2.3 , 2.31, 3.16, 2.6 , 3.11, 3.34, 3.12, 2.49, 2.01,\n", + " 2.48, 2.58, 2.83, 2.6 , 2.1 , 3.13, 3.89, 2.4 , 3.15, 3.18, 3.04,\n", + " 1.54, 2.42, 2.18, 2.46, 2.21, 3.4 , 3.67, 2.73, 2.76, 3.08, 3.99,\n", + " 2.85, 3.09, 3.13, 2.7 , 3.04, 4.08, 2.93, 3.33, 2.55, 3.91, 3.82,\n", + " 4.08, 3.98, 3.6 , 3.52, 4.37, 2.87, 3.76, 2.51, 2.56, 2.99, 3.5 ,\n", + " 3.23, 3.64, 3.63, 3.03, 2.72, 3.89, 2.08, 2.72, 3.14, 3.18, 3.47,\n", + " 2.44, 3.08, 4.06, 2.69, 3.48, 3.75, 1.94, 3.67, 2.46, 2.57, 3.24,\n", + " 3.96, 2.33])" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import train_test_split\n", + "x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=2)" + ], + "metadata": { + "id": "q4rR1DcD-2D4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import LinearRegression\n" + ], + "metadata": { + "id": "9pxoQ4aQ_S57" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "lr=LinearRegression()" + ], + "metadata": { + "id": "CsdLDMOF_eBo" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "lr.fit(x_train, y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "yjFgDQxk_0zu", + "outputId": "ccebf44b-4624-4e4e-a3b6-ed18110205b4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ], + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_test,\n", + "print(\" \")\n", + "y_test\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LO61uSdw_3S2", + "outputId": "45810a7b-23ba-4801-b0af-623c4f993b36" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([4.1 , 3.49, 2.08, 2.33, 1.94, 1.48, 1.86, 3.09, 4.21, 2.87, 3.65,\n", + " 4. , 2.89, 2.6 , 2.99, 3.25, 1.86, 3.67, 2.37, 3.42, 2.48, 3.65,\n", + " 2.6 , 2.83, 4.08, 2.56, 3.58, 3.81, 4.09, 2.01, 3.63, 2.92, 3.51,\n", + " 1.94, 2.21, 3.34, 3.34, 3.23, 2.01, 2.61])" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "H2_WnFnYApj4" + } + }, + { + "cell_type": "code", + "source": [ + "lr.predict(x_test[0].reshape(1,1))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eEKKsp10_91C", + "outputId": "ff4a6bd6-e9ab-4649-8542-fb1fac7b18fb" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([3.89111601])" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "plt.scatter(df['cgpa'],df['package'])\n", + "plt.plot(x_test,lr.predict(x_test),color='red')\n", + "plt.xlabel('CGPA')\n", + "plt.ylabel('Package(In LPA)')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 466 + }, + "id": "TgH4O5daAOLz", + "outputId": "09218a84-a754-42fc-ba87-74d8bda76103" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0, 0.5, 'Package(In LPA)')" + ] + }, + "metadata": {}, + "execution_count": 32 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "plt.scatter(df['cgpa'],df['package'])\n", + "plt.plot(x_train,lr.predict(x_train),color='red')\n", + "plt.xlabel('CGPA')\n", + "plt.ylabel('Package(In LPA)')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 466 + }, + "id": "MMAwuHifA9pl", + "outputId": "ebd6ba77-53c7-46c7-bbff-4dc900eb74bf" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0, 0.5, 'Package(In LPA)')" + ] + }, + "metadata": {}, + "execution_count": 33 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "m= lr.coef_\n", + "c= lr.intercept_\n", + "print(\"slope =\", m, \"Y-intercept =\",c)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-ms-hsvNBGK1", + "outputId": "709b310a-241d-4f01-ed2d-9ff794d0211e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "slope = [0.55795197] Y-intercept = -0.8961119222429144\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#y= mx +c\n", + "\n", + "y = m*8.26 + c" + ], + "metadata": { + "id": "BTWE0b5NBMIx" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "y" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CPvFFiiYBiw5", + "outputId": "ed637675-2ead-41a6-f2d3-39f91f6b6d80" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([3.71257138])" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y = m*9.26 + c" + ], + "metadata": { + "id": "nJgd0YdUBjau" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "y\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IHDenAYuBoI_", + "outputId": "51f040f9-7bcf-4714-d7ff-3e7e543a3d90" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([4.27052335])" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y = m*10.26 + c" + ], + "metadata": { + "id": "wdO-xjgWBpZm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "y\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BLrf6fbsBqv0", + "outputId": "51337d56-bcfe-4118-9d3f-cb0f31387c9b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([4.82847533])" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_train.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WDo9ytp4_RYC", + "outputId": "07c863e2-b014-4940-bac4-dabf89138312" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(160, 1)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "for i in range (x.shape[0]):\n", + "\n", + " if(x[i]>10):\n", + " print(\"Invalid input\")\n", + " break;\n", + "\n", + " else:\n", + " print(\"Valid input\")\n", + " y_pred=m*x[i]+c\n", + " print(y_pred)\n", + " break;\n", + "\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sZlCdIHlBsBB", + "outputId": "ed1f19c7-7efe-4350-9156-5add88c18a7a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Valid input\n", + "[2.94817717]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "class Learn:\n", + "\n", + " def __init__(self):\n", + " self.m =None\n", + " self.c =None\n", + "\n", + "\n", + " def fit(self, x_train, y_train):\n", + " num=0\n", + " den=0\n", + "\n", + " for i in range (x_train.shape[0]):\n", + " num= num+((x_train[i]-x_train.mean())*(y_train[i]-y_train.mean()))\n", + " den += x_train[i]-x_train.mean()**2\n", + "\n", + " self.m=num/den\n", + " self.c=y_train.mean()-(self.m*x_train.mean())\n", + " print(self.m)\n", + " print(self.c)\n", + "\n", + "\n", + "\n", + " def predict(self,x_test):\n", + " print(x_test)\n", + " print(\"slope\",self.m)\n", + " print(\"Y-Intercept\",self.c)\n", + "\n", + " return self.m*x_test+self.c\n", + "\n", + " def score(self,y_pred,y_test):\n", + " u=((y_test-y_pred)**2).sum()\n", + " v=((y_test-y_test.mean())**2).sum()\n", + " return 1-(u/v)\n", + "\n", + "\n" + ], + "metadata": { + "id": "YaoYsC2gB8e7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "mlTEflrg_Pk9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x_test.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NTuVT2LK-2CX", + "outputId": "8ad5a443-57fe-4cf9-b85c-56bbf651a23b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(40, 1)" + ] + }, + "metadata": {}, + "execution_count": 116 + } + ] + }, + { + "cell_type": "code", + "source": [ + "llr = Learn()" + ], + "metadata": { + "id": "QKB6sQaY_Egh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "llr.fit(x_train, y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "42s8kk3v_wJE", + "outputId": "efb24a3e-75f5-46ee-9f38-3ea309487d25" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[-0.01510711]\n", + "[3.10953526]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_train[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IoP-cuiR_3DA", + "outputId": "4621f3d6-9a37-4647-87b0-92b9ef99d141" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([7.14])" + ] + }, + "metadata": {}, + "execution_count": 119 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_train.mean()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-11wa5Gp_8wS", + "outputId": "0a32be7e-91d9-413b-9c24-4825351cc337" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "6.989937500000001" + ] + }, + "metadata": {}, + "execution_count": 120 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_test[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9123eoKh_-Ke", + "outputId": "8e93d8a6-3510-4e2b-e6b6-a2e0408d6b77" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([8.58])" + ] + }, + "metadata": {}, + "execution_count": 121 + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(llr.predict(x_test[0]))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sIt-ctLr__sX", + "outputId": "787b1523-6cb1-4659-d961-35859a7eafc0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[8.58]\n", + "slope [-0.01510711]\n", + "Y-Intercept [3.10953526]\n", + "[2.97991625]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(llr.score(llr.predict(x_test[0]),y_test))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "enH7lbUlAEI4", + "outputId": "c9dd5a69-5a4d-412c-e0e1-9e365efce7f8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[8.58]\n", + "slope [-0.01510711]\n", + "Y-Intercept [3.10953526]\n", + "-0.0004296382569233259\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "kGea3FcAA-NN" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/LR_learning(Diabetes_dataset).ipynb b/LR_learning(Diabetes_dataset).ipynb new file mode 100644 index 00000000..0c33dfec --- /dev/null +++ b/LR_learning(Diabetes_dataset).ipynb @@ -0,0 +1,2159 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyP6SZeucNcwbQLbfGFt8wWS", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "id": "bc1fHYcac96S" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.datasets import load_diabetes\n" + ] + }, + { + "cell_type": "code", + "source": [ + "x,y= load_diabetes(return_X_y=True,as_frame=True)" + ], + "metadata": { + "id": "kxChBx3XP8wK" + }, + "execution_count": 76, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "ClW-HlKlQCtS", + "outputId": "6ec9ab4b-e40e-47bc-c4d3-73a02e5f4b9f" + }, + "execution_count": 77, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " age sex bmi bp s1 s2 s3 \\\n", + "0 0.038076 0.050680 0.061696 0.021872 -0.044223 -0.034821 -0.043401 \n", + "1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163 0.074412 \n", + "2 0.085299 0.050680 0.044451 -0.005670 -0.045599 -0.034194 -0.032356 \n", + "3 -0.089063 -0.044642 -0.011595 -0.036656 0.012191 0.024991 -0.036038 \n", + "4 0.005383 -0.044642 -0.036385 0.021872 0.003935 0.015596 0.008142 \n", + ".. ... ... ... ... ... ... ... \n", + "437 0.041708 0.050680 0.019662 0.059744 -0.005697 -0.002566 -0.028674 \n", + "438 -0.005515 0.050680 -0.015906 -0.067642 0.049341 0.079165 -0.028674 \n", + "439 0.041708 0.050680 -0.015906 0.017293 -0.037344 -0.013840 -0.024993 \n", + "440 -0.045472 -0.044642 0.039062 0.001215 0.016318 0.015283 -0.028674 \n", + "441 -0.045472 -0.044642 -0.073030 -0.081413 0.083740 0.027809 0.173816 \n", + "\n", + " s4 s5 s6 \n", + "0 -0.002592 0.019907 -0.017646 \n", + "1 -0.039493 -0.068332 -0.092204 \n", + "2 -0.002592 0.002861 -0.025930 \n", + "3 0.034309 0.022688 -0.009362 \n", + "4 -0.002592 -0.031988 -0.046641 \n", + ".. ... ... ... \n", + "437 -0.002592 0.031193 0.007207 \n", + "438 0.034309 -0.018114 0.044485 \n", + "439 -0.011080 -0.046883 0.015491 \n", + "440 0.026560 0.044529 -0.025930 \n", + "441 -0.039493 -0.004222 0.003064 \n", + "\n", + "[442 rows x 10 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexbmibps1s2s3s4s5s6
00.0380760.0506800.0616960.021872-0.044223-0.034821-0.043401-0.0025920.019907-0.017646
1-0.001882-0.044642-0.051474-0.026328-0.008449-0.0191630.074412-0.039493-0.068332-0.092204
20.0852990.0506800.044451-0.005670-0.045599-0.034194-0.032356-0.0025920.002861-0.025930
3-0.089063-0.044642-0.011595-0.0366560.0121910.024991-0.0360380.0343090.022688-0.009362
40.005383-0.044642-0.0363850.0218720.0039350.0155960.008142-0.002592-0.031988-0.046641
.................................
4370.0417080.0506800.0196620.059744-0.005697-0.002566-0.028674-0.0025920.0311930.007207
438-0.0055150.050680-0.015906-0.0676420.0493410.079165-0.0286740.034309-0.0181140.044485
4390.0417080.050680-0.0159060.017293-0.037344-0.013840-0.024993-0.011080-0.0468830.015491
440-0.045472-0.0446420.0390620.0012150.0163180.015283-0.0286740.0265600.044529-0.025930
441-0.045472-0.044642-0.073030-0.0814130.0837400.0278090.173816-0.039493-0.0042220.003064
\n", + "

442 rows × 10 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "x", + "summary": "{\n \"name\": \"x\",\n \"rows\": 442,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0476190476190476,\n \"min\": -0.1072256316073538,\n \"max\": 0.11072667545381144,\n \"num_unique_values\": 58,\n \"samples\": [\n 0.038075906433423026,\n -0.09269547780327612,\n 0.01991321417832592\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0476190476190478,\n \"min\": -0.044641636506989144,\n \"max\": 0.05068011873981862,\n \"num_unique_values\": 2,\n \"samples\": [\n -0.044641636506989144,\n 0.05068011873981862\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bmi\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.04761904761904761,\n \"min\": -0.09027529589850945,\n \"max\": 0.17055522598064407,\n \"num_unique_values\": 163,\n \"samples\": [\n 0.008883414898524095,\n -0.05470749746044306\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.04761904761904762,\n \"min\": -0.11239880254408448,\n \"max\": 0.13204361674121307,\n \"num_unique_values\": 100,\n \"samples\": [\n 0.03333707926361473,\n 0.02531523648988596\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.047619047619047554,\n \"min\": -0.12678066991651324,\n \"max\": 0.15391371315651542,\n \"num_unique_values\": 141,\n \"samples\": [\n 0.041085578784023497,\n 0.08374011738825825\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.047619047619047644,\n \"min\": -0.11561306597939897,\n \"max\": 0.19878798965729408,\n \"num_unique_values\": 302,\n \"samples\": [\n 0.07415490186505921,\n 0.07008397186179521\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s3\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.04761904761904758,\n \"min\": -0.10230705051741597,\n \"max\": 0.18117906039727852,\n \"num_unique_values\": 63,\n \"samples\": [\n 0.09281975309919192,\n 0.15908923357275687\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s4\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.047619047619047554,\n \"min\": -0.0763945037500033,\n \"max\": 0.18523444326019867,\n \"num_unique_values\": 66,\n \"samples\": [\n 0.08670845052151895,\n -0.024732934523729287\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s5\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.04761904761904763,\n \"min\": -0.12609712083330468,\n \"max\": 0.13359728192191356,\n \"num_unique_values\": 184,\n \"samples\": [\n -0.07213275338232743,\n -0.021395309255276825\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s6\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.04761904761904766,\n \"min\": -0.13776722569000302,\n \"max\": 0.13561183068907107,\n \"num_unique_values\": 56,\n \"samples\": [\n -0.01764612515980379,\n -0.09634615654165846\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 77 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LgB3wpUxQGIo", + "outputId": "d81a13db-bb8e-4d6f-870a-17e8e926a014" + }, + "execution_count": 78, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(442, 10)" + ] + }, + "metadata": {}, + "execution_count": 78 + } + ] + }, + { + "cell_type": "code", + "source": [ + "x.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 390 + }, + "id": "CUeqRQ73QJfg", + "outputId": "c9939ec9-8014-4076-f978-f4c406bfc587" + }, + "execution_count": 79, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " age sex bmi bp s1 \\\n", + "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", + "mean -2.511817e-19 1.230790e-17 -2.245564e-16 -4.797570e-17 -1.381499e-17 \n", + "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", + "min -1.072256e-01 -4.464164e-02 -9.027530e-02 -1.123988e-01 -1.267807e-01 \n", + "25% -3.729927e-02 -4.464164e-02 -3.422907e-02 -3.665608e-02 -3.424784e-02 \n", + "50% 5.383060e-03 -4.464164e-02 -7.283766e-03 -5.670422e-03 -4.320866e-03 \n", + "75% 3.807591e-02 5.068012e-02 3.124802e-02 3.564379e-02 2.835801e-02 \n", + "max 1.107267e-01 5.068012e-02 1.705552e-01 1.320436e-01 1.539137e-01 \n", + "\n", + " s2 s3 s4 s5 s6 \n", + "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", + "mean 3.918434e-17 -5.777179e-18 -9.042540e-18 9.293722e-17 1.130318e-17 \n", + "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", + "min -1.156131e-01 -1.023071e-01 -7.639450e-02 -1.260971e-01 -1.377672e-01 \n", + "25% -3.035840e-02 -3.511716e-02 -3.949338e-02 -3.324559e-02 -3.317903e-02 \n", + "50% -3.819065e-03 -6.584468e-03 -2.592262e-03 -1.947171e-03 -1.077698e-03 \n", + "75% 2.984439e-02 2.931150e-02 3.430886e-02 3.243232e-02 2.791705e-02 \n", + "max 1.987880e-01 1.811791e-01 1.852344e-01 1.335973e-01 1.356118e-01 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexbmibps1s2s3s4s5s6
count4.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+02
mean-2.511817e-191.230790e-17-2.245564e-16-4.797570e-17-1.381499e-173.918434e-17-5.777179e-18-9.042540e-189.293722e-171.130318e-17
std4.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-02
min-1.072256e-01-4.464164e-02-9.027530e-02-1.123988e-01-1.267807e-01-1.156131e-01-1.023071e-01-7.639450e-02-1.260971e-01-1.377672e-01
25%-3.729927e-02-4.464164e-02-3.422907e-02-3.665608e-02-3.424784e-02-3.035840e-02-3.511716e-02-3.949338e-02-3.324559e-02-3.317903e-02
50%5.383060e-03-4.464164e-02-7.283766e-03-5.670422e-03-4.320866e-03-3.819065e-03-6.584468e-03-2.592262e-03-1.947171e-03-1.077698e-03
75%3.807591e-025.068012e-023.124802e-023.564379e-022.835801e-022.984439e-022.931150e-023.430886e-023.243232e-022.791705e-02
max1.107267e-015.068012e-021.705552e-011.320436e-011.539137e-011.987880e-011.811791e-011.852344e-011.335973e-011.356118e-01
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"x\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.26771856938913,\n \"min\": -0.1072256316073538,\n \"max\": 442.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -2.511816797794472e-19,\n 0.005383060374248237,\n 442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.26984438387296,\n \"min\": -0.044641636506989144,\n \"max\": 442.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 1.2307902309192911e-17,\n 0.05068011873981862,\n 0.0476190476190478\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bmi\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.26467538082832,\n \"min\": -0.09027529589850945,\n \"max\": 442.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -2.2455642172282577e-16,\n -0.007283766209687899,\n 442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.26755458292146,\n \"min\": -0.11239880254408448,\n \"max\": 442.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -4.7975700837874414e-17,\n -0.00567042229275739,\n 442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.26735866123312,\n \"min\": -0.12678066991651324,\n \"max\": 442.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -1.3814992387869595e-17,\n -0.004320865536613489,\n 442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.26423640225082,\n \"min\": -0.11561306597939897,\n \"max\": 442.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 3.918434204559376e-17,\n -0.0038190651205350003,\n 442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s3\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.26485662659837,\n \"min\": -0.10230705051741597,\n \"max\": 442.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -5.7771786349272854e-18,\n -0.006584467611155497,\n 442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s4\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.26310825587572,\n \"min\": -0.0763945037500033,\n \"max\": 442.0,\n \"num_unique_values\": 8,\n \"samples\": [\n -9.042540472060099e-18,\n -0.002592261998183278,\n 442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s5\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.26797139645322,\n \"min\": -0.12609712083330468,\n \"max\": 442.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 9.293722151839546e-17,\n -0.0019471710869220743,\n 442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"s6\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.2686414608801,\n \"min\": -0.13776722569000302,\n \"max\": 442.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1.1303175590075123e-17,\n -0.0010776975004659671,\n 442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 79 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 458 + }, + "id": "EGn4-8AGQKWf", + "outputId": "2207e6e9-0296-424f-ea51-3774692d23b9" + }, + "execution_count": 80, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 151.0\n", + "1 75.0\n", + "2 141.0\n", + "3 206.0\n", + "4 135.0\n", + " ... \n", + "437 178.0\n", + "438 104.0\n", + "439 132.0\n", + "440 220.0\n", + "441 57.0\n", + "Name: target, Length: 442, dtype: float64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
target
0151.0
175.0
2141.0
3206.0
4135.0
......
437178.0
438104.0
439132.0
440220.0
44157.0
\n", + "

442 rows × 1 columns

\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 80 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mDmXiPLoQVBG", + "outputId": "cb536104-016a-45ff-a582-8f6cfa81772a" + }, + "execution_count": 81, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(442,)" + ] + }, + "metadata": {}, + "execution_count": 81 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 335 + }, + "id": "tdO6tevTQWHN", + "outputId": "2f4ed6c2-0fe2-4492-c74c-0bc434275963" + }, + "execution_count": 82, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "count 442.000000\n", + "mean 152.133484\n", + "std 77.093005\n", + "min 25.000000\n", + "25% 87.000000\n", + "50% 140.500000\n", + "75% 211.500000\n", + "max 346.000000\n", + "Name: target, dtype: float64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
target
count442.000000
mean152.133484
std77.093005
min25.000000
25%87.000000
50%140.500000
75%211.500000
max346.000000
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 82 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Now doing Linear Regression on this dataset\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=2)" + ], + "metadata": { + "id": "szXNVMfnQXQu" + }, + "execution_count": 83, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(x_train)\n", + "print(\"-----------------------------------\")\n", + "print(y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aKE9Vi1eQiBX", + "outputId": "993cd70a-3f10-44a0-e171-403d16a8f0dd" + }, + "execution_count": 84, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " age sex bmi bp s1 s2 s3 \\\n", + "70 -0.001882 -0.044642 -0.069797 -0.012556 -0.000193 -0.009143 0.070730 \n", + "37 -0.009147 -0.044642 0.011039 -0.057313 -0.024960 -0.042963 0.030232 \n", + "170 0.023546 0.050680 -0.020218 -0.036656 -0.013953 -0.015092 0.059685 \n", + "400 -0.023677 -0.044642 0.045529 0.090729 -0.018080 -0.035447 0.070730 \n", + "286 -0.038207 -0.044642 -0.054707 -0.077970 -0.033216 -0.086490 0.140681 \n", + ".. ... ... ... ... ... ... ... \n", + "299 0.038076 0.050680 -0.013751 -0.015999 -0.035968 -0.021982 -0.013948 \n", + "22 -0.085430 -0.044642 -0.004050 -0.009113 -0.002945 0.007767 0.022869 \n", + "72 0.063504 0.050680 -0.004050 -0.012556 0.103003 0.048790 0.056003 \n", + "15 -0.052738 0.050680 -0.018062 0.080401 0.089244 0.107662 -0.039719 \n", + "168 0.001751 0.050680 0.059541 -0.002228 0.061725 0.063195 -0.058127 \n", + "\n", + " s4 s5 s6 \n", + "70 -0.039493 -0.062917 0.040343 \n", + "37 -0.039493 0.017036 -0.005220 \n", + "170 -0.039493 -0.096435 -0.017646 \n", + "400 -0.039493 -0.034522 -0.009362 \n", + "286 -0.076395 -0.019198 -0.005220 \n", + ".. ... ... ... \n", + "299 -0.002592 -0.025953 -0.001078 \n", + "22 -0.039493 -0.061176 -0.013504 \n", + "72 -0.002592 0.084492 -0.017646 \n", + "15 0.108111 0.036060 -0.042499 \n", + "168 0.108111 0.068986 0.127328 \n", + "\n", + "[353 rows x 10 columns]\n", + "-----------------------------------\n", + "70 48.0\n", + "37 276.0\n", + "170 47.0\n", + "400 175.0\n", + "286 60.0\n", + " ... \n", + "299 83.0\n", + "22 68.0\n", + "72 202.0\n", + "15 171.0\n", + "168 268.0\n", + "Name: target, Length: 353, dtype: float64\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import LinearRegression" + ], + "metadata": { + "id": "VYF1BilUQqmf" + }, + "execution_count": 85, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "lr = LinearRegression()\n", + "lr.fit(x_train,y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "ckhiB_PjQ4eh", + "outputId": "3ba6f798-891a-44cd-f856-e851c4c02ebf" + }, + "execution_count": 86, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ], + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 86 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y_pred= lr.predict(x_test)" + ], + "metadata": { + "id": "_9fx87T1Q7fy" + }, + "execution_count": 87, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.metrics import r2_score\n", + "r2_score(y_test,y_pred)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RBohs0xdQ_3P", + "outputId": "50b46d0d-fe01-4726-f753-95833c49bb38" + }, + "execution_count": 88, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.439933866156897" + ] + }, + "metadata": {}, + "execution_count": 88 + } + ] + }, + { + "cell_type": "code", + "source": [ + "m =lr.coef_\n", + "m" + ], + "metadata": { + "id": "p65TLRciRG0q", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "08e6f625-b5f9-4f7f-b8ce-93d1042f2ca0" + }, + "execution_count": 89, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ -9.15865318, -205.45432163, 516.69374454, 340.61999905,\n", + " -895.5520019 , 561.22067904, 153.89310954, 126.73139688,\n", + " 861.12700152, 52.42112238])" + ] + }, + "metadata": {}, + "execution_count": 89 + } + ] + }, + { + "cell_type": "code", + "source": [ + "c =lr.intercept_\n", + "c" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KTs8Gc5AAGgB", + "outputId": "f3ecf74b-452e-460e-c89a-3253b5cff34b" + }, + "execution_count": 90, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "151.88331005254167" + ] + }, + "metadata": {}, + "execution_count": 90 + } + ] + }, + { + "cell_type": "code", + "source": [ + "### Making a Regression based class" + ], + "metadata": { + "id": "VtrUQhQqAmuz" + }, + "execution_count": 91, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "class algo:\n", + " def __init__(self):\n", + " self.m=None\n", + " self.c=None\n", + "\n", + " def fit(self,x_train,y_train):\n", + " x_train = np.insert(x_train,0,1,axis =1 )\n", + "\n", + " b = np.linalg.inv(np.dot(x_train.T,x_train)).dot(x_train.T).dot(y_train)\n", + "\n", + " self.m = b[1:]\n", + " self.c = b[0]\n", + "\n", + " print(self.m)\n", + " print(self.c)\n", + "\n", + "\n", + "\n", + " def predict(self,x_test):\n", + " y_pred = np.dot(x_test,self.m) + self.c\n", + " return y_pred\n", + "\n", + " def score(self,y_test,y_pred):\n", + " u = ((y_test - y_pred)**2).sum()\n", + " v = ((y_test - y_test.mean())**2).sum()\n", + " return 1 - u/v\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "21tmFLWEAqiQ" + }, + "execution_count": 92, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "al = algo()\n", + "al.fit(x_train,y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Kb876IrJA-nP", + "outputId": "a3f93dcc-633a-4e65-e803-e9848049fab5" + }, + "execution_count": 93, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ -9.15865318 -205.45432163 516.69374454 340.61999905 -895.5520019\n", + " 561.22067904 153.89310954 126.73139688 861.12700152 52.42112238]\n", + "151.88331005254165\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "x_train.shape\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iya2qAq0BAi3", + "outputId": "6a3c1301-d497-484a-fc15-293f3f66a140" + }, + "execution_count": 94, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(353, 10)" + ] + }, + "metadata": {}, + "execution_count": 94 + } + ] + }, + { + "cell_type": "code", + "source": [ + "np.insert(x_train,0,1,axis =1 ).shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2DpvFfRJBB5m", + "outputId": "b13bc80c-4850-4d32-bdff-4a637b0c1ff3" + }, + "execution_count": 95, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(353, 11)" + ] + }, + "metadata": {}, + "execution_count": 95 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y_pred = al.predict(x_test)" + ], + "metadata": { + "id": "ZWc8dIGaGW3f" + }, + "execution_count": 96, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "r2_score(y_test,y_pred)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "atLg3GHwGbSS", + "outputId": "1ee42711-dff9-41c6-c4d8-a9095e1de64e" + }, + "execution_count": 97, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.43993386615689634" + ] + }, + "metadata": {}, + "execution_count": 97 + } + ] + }, + { + "cell_type": "code", + "source": [ + "al.m\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5FiOXS1lGged", + "outputId": "b6a4c77b-f57a-4e9f-e07c-fcc44a785674" + }, + "execution_count": 98, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ -9.15865318, -205.45432163, 516.69374454, 340.61999905,\n", + " -895.5520019 , 561.22067904, 153.89310954, 126.73139688,\n", + " 861.12700152, 52.42112238])" + ] + }, + "metadata": {}, + "execution_count": 98 + } + ] + }, + { + "cell_type": "code", + "source": [ + "al.c" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vK2YtIf8GmRd", + "outputId": "f95b43c2-4787-4120-9829-809729c488e5" + }, + "execution_count": 99, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "151.88331005254165" + ] + }, + "metadata": {}, + "execution_count": 99 + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install plotly\n", + "import plotly.express as px" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "22O74l7PGmwl", + "outputId": "994c575a-cbac-4e30-ff0d-af814c7a6125" + }, + "execution_count": 100, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: plotly in /usr/local/lib/python3.10/dist-packages (5.24.1)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly) (9.0.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from plotly) (24.1)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "utEBLywnGyfD" + }, + "execution_count": 100, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "fig = px.scatter_3d(\n", + " x=x_train.iloc[:, 0],\n", + " y=x_train.iloc[:, 1],\n", + " z=y_train,\n", + " color=x_train.iloc[:, 0]\n", + ")\n", + "\n", + "fig.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 + }, + "id": "RhxvaWgEt9qp", + "outputId": "32069063-88e1-4384-c793-827207abb802" + }, + "execution_count": 102, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "FKP1W00o79XM" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file