diff --git a/README.md b/README.md index ca63d26..a76fdec 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,35 @@ -# neuralnet -Neural Network Implementation in NumPy +# Neural Network Implementation in NumPy + +A "from scratch" implementation of classic feed-forward neural networks for +binary/multi-class classification using ReLU activations, cross entropy loss and +sigmoid/softmax output. + +Read through the documentation in `neuralnet.py` for a description of the +implementation. + +An example usage of `neuralnet.py` is given in the `Usage.ipynb` notebook. + +Alternatively you can open the whole code in Google Colab -> [here](https://colab.research.google.com/github/michabirklbauer/neuralnet/neuralnet-colab.ipynb). + +## Requirements + +`neuralnet.py` is purely implemented in NumPy: +- [NumPy](https://numpy.org/): `pip install numpy` + +To run the examples in the `Usage.ipynb` notebook locally please install the +requirements noted in `requirements.txt`: +- [Requirements](https://github.com/michabirklbauer/neuralnet/blob/master/requirements.txt): `pip install -r requirements.txt` + +## Data + +The following datasets are used in the examples: +- Multi-class classification: [MNIST](http://yann.lecun.com/exdb/mnist/index.html) +- Binary-class classification: [Breast Cancer Wisconsin (Diagnostic) Data Set](https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29) + +## License + +- [MIT](https://github.com/michabirklbauer/neuralnet/blob/master/LICENSE) + +## Contact + +- [micha.birklbauer@gmail.com](mailto:micha.birklbauer@gmail.com) diff --git a/Usage.ipynb b/Usage.ipynb new file mode 100644 index 0000000..f49c779 --- /dev/null +++ b/Usage.ipynb @@ -0,0 +1,1262 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a60f041d-d688-4b00-8bc1-3e01da0d947f", + "metadata": {}, + "source": [ + "# **Example Usage of `neuralnet.py`**\n", + "\n", + "### **Multi-Class Classification**\n", + "\n", + "### **Dataset: [MNIST](http://yann.lecun.com/exdb/mnist/index.html)**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c7a8280c-d0b9-41d5-88e1-993db76a73b4", + "metadata": {}, + "outputs": [], + "source": [ + "from zipfile import ZipFile as zip\n", + "\n", + "with zip(\"data.zip\") as f:\n", + " f.extractall()\n", + " f.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "579b7aa9-24c5-4dd1-b8b7-719cbb1f7b09", + "metadata": {}, + "outputs": [], + "source": [ + "from neuralnet import NeuralNetwork\n", + "import numpy as np\n", + "import pandas as pd\n", + "from matplotlib import pyplot as plt\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a8e4f9b1-140c-42ac-9b04-11e23b27d1eb", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"multiclass_train.csv\")\n", + "train, test = train_test_split(data, test_size = 0.3)\n", + "train_data = train.loc[:, train.columns != \"label\"].to_numpy() / 255\n", + "train_target = train[\"label\"].to_numpy()\n", + "test_data = test.loc[:, test.columns != \"label\"].to_numpy() / 255\n", + "test_target = test[\"label\"].to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f9a8ba9c-7255-40b3-9e5f-f999e89eb257", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
labelpixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8...pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783
179294000000000...0000000000
26309000000000...0000000000
315846000000000...0000000000
126680000000000...0000000000
14070000000000...0000000000
..................................................................
323701000000000...0000000000
174615000000000...0000000000
50791000000000...0000000000
294135000000000...0000000000
333857000000000...0000000000
\n", + "

29400 rows × 785 columns

\n", + "
" + ], + "text/plain": [ + " label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 \\\n", + "17929 4 0 0 0 0 0 0 0 0 \n", + "2630 9 0 0 0 0 0 0 0 0 \n", + "31584 6 0 0 0 0 0 0 0 0 \n", + "12668 0 0 0 0 0 0 0 0 0 \n", + "1407 0 0 0 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... ... ... \n", + "32370 1 0 0 0 0 0 0 0 0 \n", + "17461 5 0 0 0 0 0 0 0 0 \n", + "5079 1 0 0 0 0 0 0 0 0 \n", + "29413 5 0 0 0 0 0 0 0 0 \n", + "33385 7 0 0 0 0 0 0 0 0 \n", + "\n", + " pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n", + "17929 0 ... 0 0 0 0 0 \n", + "2630 0 ... 0 0 0 0 0 \n", + "31584 0 ... 0 0 0 0 0 \n", + "12668 0 ... 0 0 0 0 0 \n", + "1407 0 ... 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "32370 0 ... 0 0 0 0 0 \n", + "17461 0 ... 0 0 0 0 0 \n", + "5079 0 ... 0 0 0 0 0 \n", + "29413 0 ... 0 0 0 0 0 \n", + "33385 0 ... 0 0 0 0 0 \n", + "\n", + " pixel779 pixel780 pixel781 pixel782 pixel783 \n", + "17929 0 0 0 0 0 \n", + "2630 0 0 0 0 0 \n", + "31584 0 0 0 0 0 \n", + "12668 0 0 0 0 0 \n", + "1407 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "32370 0 0 0 0 0 \n", + "17461 0 0 0 0 0 \n", + "5079 0 0 0 0 0 \n", + "29413 0 0 0 0 0 \n", + "33385 0 0 0 0 0 \n", + "\n", + "[29400 rows x 785 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6d2c5098-ba6b-4533-be14-a7e1395c944b", + "metadata": {}, + "outputs": [], + "source": [ + "one_hot = OneHotEncoder(sparse = False, categories = \"auto\")\n", + "train_target = one_hot.fit_transform(train_target.reshape(-1, 1))\n", + "test_target = one_hot.transform(test_target.reshape(-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7417c6e4-fd30-498c-a4de-5657ffb0e5f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---- Model Summary ----\n", + "Layer 1: relu\n", + "W: (32, 784) b: (32, 1)\n", + "Trainable parameters: 25120\n", + "Layer 2: relu\n", + "W: (16, 32) b: (16, 1)\n", + "Trainable parameters: 528\n", + "Layer 3: softmax\n", + "W: (10, 16) b: (10, 1)\n", + "Trainable parameters: 170\n" + ] + } + ], + "source": [ + "NN = NeuralNetwork(input_size = train_data.shape[1])\n", + "NN.add_layer(32, \"relu\")\n", + "NN.add_layer(16, \"relu\")\n", + "NN.add_layer(10, \"softmax\")\n", + "NN.compile(loss = \"categorical crossentropy\")\n", + "NN.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bc46f780-ff80-43ec-8eae-0e31ecd39a30", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training epoch 1...\n", + "Current loss: 0.4636323587703678\n", + "Epoch 1 done!\n", + "Training epoch 2...\n", + "Current loss: 0.22331880492244685\n", + "Epoch 2 done!\n", + "Training epoch 3...\n", + "Current loss: 0.1736999159275795\n", + "Epoch 3 done!\n", + "Training epoch 4...\n", + "Current loss: 0.14369509648982923\n", + "Epoch 4 done!\n", + "Training epoch 5...\n", + "Current loss: 0.12427214647108864\n", + "Epoch 5 done!\n", + "Training epoch 6...\n", + "Current loss: 0.1101834383565226\n", + "Epoch 6 done!\n", + "Training epoch 7...\n", + "Current loss: 0.10044103041530172\n", + "Epoch 7 done!\n", + "Training epoch 8...\n", + "Current loss: 0.09091286128970821\n", + "Epoch 8 done!\n", + "Training epoch 9...\n", + "Current loss: 0.08300819622254964\n", + "Epoch 9 done!\n", + "Training epoch 10...\n", + "Current loss: 0.07745555155379909\n", + "Epoch 10 done!\n", + "Training epoch 11...\n", + "Current loss: 0.07170223282036263\n", + "Epoch 11 done!\n", + "Training epoch 12...\n", + "Current loss: 0.068338226505863\n", + "Epoch 12 done!\n", + "Training epoch 13...\n", + "Current loss: 0.06136732501577605\n", + "Epoch 13 done!\n", + "Training epoch 14...\n", + "Current loss: 0.0559277977809122\n", + "Epoch 14 done!\n", + "Training epoch 15...\n", + "Current loss: 0.05419667267944242\n", + "Epoch 15 done!\n", + "Training epoch 16...\n", + "Current loss: 0.050900625678517726\n", + "Epoch 16 done!\n", + "Training epoch 17...\n", + "Current loss: 0.04833006784938205\n", + "Epoch 17 done!\n", + "Training epoch 18...\n", + "Current loss: 0.04177950013769969\n", + "Epoch 18 done!\n", + "Training epoch 19...\n", + "Current loss: 0.040692531474785014\n", + "Epoch 19 done!\n", + "Training epoch 20...\n", + "Current loss: 0.039062151996810276\n", + "Epoch 20 done!\n", + "Training epoch 21...\n", + "Current loss: 0.040631697529318576\n", + "Epoch 21 done!\n", + "Training epoch 22...\n", + "Current loss: 0.03587460961150384\n", + "Epoch 22 done!\n", + "Training epoch 23...\n", + "Current loss: 0.03348255795122354\n", + "Epoch 23 done!\n", + "Training epoch 24...\n", + "Current loss: 0.031529388385383085\n", + "Epoch 24 done!\n", + "Training epoch 25...\n", + "Current loss: 0.029643544721363053\n", + "Epoch 25 done!\n", + "Training epoch 26...\n", + "Current loss: 0.028773139206400806\n", + "Epoch 26 done!\n", + "Training epoch 27...\n", + "Current loss: 0.022705054266604976\n", + "Epoch 27 done!\n", + "Training epoch 28...\n", + "Current loss: 0.02103327505716646\n", + "Epoch 28 done!\n", + "Training epoch 29...\n", + "Current loss: 0.027954782898974195\n", + "Epoch 29 done!\n", + "Training epoch 30...\n", + "Current loss: 0.026080962157392643\n", + "Epoch 30 done!\n", + "Training finished after epoch 30 with a loss of 0.026080962157392643.\n" + ] + } + ], + "source": [ + "hist = NN.fit(train_data, train_target, epochs = 30, batch_size = 16, learning_rate = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5d833848-9d24-47b3-b690-d736a50ebe4c", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def plot_history(hist):\n", + " plt.plot(hist)\n", + " plt.title(\"Model Loss\")\n", + " plt.xlabel(\"Epochs\")\n", + " plt.ylabel(\"Loss\")\n", + " plt.show()\n", + " \n", + "plot_history(hist);" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7f30a5bf-caca-44fc-8d5a-8ed8863600e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training accuracy: 0.9939455782312925\n", + "Test accuracy: 0.9577777777777777\n" + ] + } + ], + "source": [ + "train_predictions = np.argmax(NN.predict(train_data), axis = 1)\n", + "print(\"Training accuracy: \", accuracy_score(train[\"label\"].to_numpy(), train_predictions))\n", + "test_predictions = np.argmax(NN.predict(test_data), axis = 1)\n", + "print(\"Test accuracy: \", accuracy_score(test[\"label\"].to_numpy(), test_predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5d3ea10a-9a1a-4f7b-8dad-3a112b3e5add", + "metadata": {}, + "outputs": [], + "source": [ + "def predict_image(index):\n", + " current_image = test_data[index, :]\n", + " prediction = np.argmax(NN.predict(current_image), axis = 1)\n", + " label = test[\"label\"].to_numpy()[index]\n", + " print(\"Prediction: \", prediction)\n", + " print(\"Label: \", label)\n", + " \n", + " current_image = current_image.reshape((28, 28)) * 255\n", + " plt.gray()\n", + " plt.imshow(current_image, interpolation = \"nearest\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e143b0a5-0cf2-43b7-894c-8497e17b4461", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction: [9]\n", + "Label: 9\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaAAAAGdCAYAAABU0qcqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAa80lEQVR4nO3df2xV9f3H8ddtaa+I7WWltLd3FCiosMiPRQZdozIcDdAlCEoWAZOBYRBZMQPGdCwKui3pZItjLgyTxcDM5MfIBCLZWKDQEl0BQQm6zYayOiDQIiy9F4otSD/fP4j364UWPJd7efdeno/kJNx7z6f37fGEJ7f39tTnnHMCAOAWy7AeAABweyJAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADARA/rAa7W0dGhkydPKicnRz6fz3ocAIBHzjmdO3dOoVBIGRldv87pdgE6efKkiouLrccAANyk48ePq1+/fl0+3u2+BZeTk2M9AgAgAW7093nSArRq1SoNHDhQd9xxh0pLS7V///4vtY5vuwFAerjR3+dJCdDGjRu1ePFiLV++XO+9955GjhypiRMn6vTp08l4OgBAKnJJMGbMGFdZWRm9ffnyZRcKhVxVVdUN14bDYSeJjY2NjS3Ft3A4fN2/7xP+CujixYs6ePCgysvLo/dlZGSovLxcdXV11+zf3t6uSCQSswEA0l/CA3TmzBldvnxZhYWFMfcXFhaqqanpmv2rqqoUCASiG5+AA4Dbg/mn4JYuXapwOBzdjh8/bj0SAOAWSPjPAeXn5yszM1PNzc0x9zc3NysYDF6zv9/vl9/vT/QYAIBuLuGvgLKzszVq1ChVV1dH7+vo6FB1dbXKysoS/XQAgBSVlCshLF68WLNmzdI3vvENjRkzRitXrlRra6uefPLJZDwdACAFJSVAjz/+uD755BMtW7ZMTU1N+vrXv67t27df88EEAMDty+ecc9ZDfFEkElEgELAeAwBwk8LhsHJzc7t83PxTcACA2xMBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADCR8AC98MIL8vl8MdvQoUMT/TQAgBTXIxlf9L777tPOnTv//0l6JOVpAAApLCll6NGjh4LBYDK+NAAgTSTlPaAjR44oFApp0KBBeuKJJ3Ts2LEu921vb1ckEonZAADpL+EBKi0t1dq1a7V9+3atXr1ajY2Neuihh3Tu3LlO96+qqlIgEIhuxcXFiR4JANAN+ZxzLplP0NLSogEDBujll1/WnDlzrnm8vb1d7e3t0duRSIQIAUAaCIfDys3N7fLxpH86oHfv3rr33nvV0NDQ6eN+v19+vz/ZYwAAupmk/xzQ+fPndfToURUVFSX7qQAAKSThAVqyZIlqa2v18ccf6x//+IceffRRZWZmasaMGYl+KgBACkv4t+BOnDihGTNm6OzZs+rbt68efPBB7d27V3379k30UwEAUljSP4TgVSQSUSAQsB4DKe7hhx+Oa933v/99z2t69erlec2UKVM8r9m9e7fnNe+++67nNZK0atUqz2uu9+MWuD3d6EMIXAsOAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADDBxUhxS1VUVHheU1VV5XnN0KFDPa+RpOzs7LjWefXBBx94XjN8+PAkTNK5trY2z2sqKys9r1mzZo3nNUgdXIwUANAtESAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwARXw4aysrLiWvfb3/7W85pZs2Z5XtOzZ0/Pa6qrqz2vkaSNGzd6XvPXv/7V85r//e9/ntfk5eV5XjNjxgzPayTppZde8rzms88+87wmnquWf/zxx57XwAZXwwYAdEsECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkuRgo9+eSTca177bXXPK/55z//6XnNqlWrPK/5wx/+4HmNJF2+fDmudelm3759nteMHj3a85r777/f85pDhw55XgMbXIwUANAtESAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmelgPgMTq0cP7/9KZM2fG9VyfffaZ5zVPP/205zU1NTWe1+DmXLx48ZY8z+TJkz2v4WKk6YNXQAAAEwQIAGDCc4D27NmjyZMnKxQKyefzacuWLTGPO+e0bNkyFRUVqWfPniovL9eRI0cSNS8AIE14DlBra6tGjhzZ5S8JW7FihV555RW9+uqr2rdvn3r16qWJEyeqra3tpocFAKQPz+9YV1RUqKKiotPHnHNauXKlnnvuOU2ZMkWS9Prrr6uwsFBbtmzR9OnTb25aAEDaSOh7QI2NjWpqalJ5eXn0vkAgoNLSUtXV1XW6pr29XZFIJGYDAKS/hAaoqalJklRYWBhzf2FhYfSxq1VVVSkQCES34uLiRI4EAOimzD8Ft3TpUoXD4eh2/Phx65EAALdAQgMUDAYlSc3NzTH3Nzc3Rx+7mt/vV25ubswGAEh/CQ1QSUmJgsGgqquro/dFIhHt27dPZWVliXwqAECK8/wpuPPnz6uhoSF6u7GxUYcOHVJeXp769++vhQsX6he/+IXuuecelZSU6Pnnn1coFNLUqVMTOTcAIMV5DtCBAwf08MMPR28vXrxYkjRr1iytXbtWzzzzjFpbWzVv3jy1tLTowQcf1Pbt23XHHXckbmoAQMrzOeec9RBfFIlEFAgErMdIWfPmzfO85tVXX43rubZt2+Z5zSOPPBLXc0Hy+Xye19xzzz1xPdcHH3zgeU1WVpbnNQ899JDnNe+8847nNbARDoev+76++afgAAC3JwIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJjw/OsY0L119Ztnk+Hq33yL5Bo0aJDnNR999FESJkmcpqYm6xFgiFdAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJLkYKGBg7dqznNX/605+SMEnivPvuu57X/Oc//0nCJEgVvAICAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAExwMdI0E88FIeM1ffp0z2s2bNjgeU11dbXnNfHq1auX5zVz5871vGbFihWe12RmZnpec+bMGc9rJCk/P9/zGufcLVmD9MErIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABBcjTTM7d+70vOb111+P67m+973veV6zZcsWz2vivaBmPLKzsz2viecioVu3bvW85te//rXnNYsXL/a8RpK++93vxrUO8IJXQAAAEwQIAGDCc4D27NmjyZMnKxQKyefzXfMtldmzZ8vn88VskyZNStS8AIA04TlAra2tGjlypFatWtXlPpMmTdKpU6ei2/r1629qSABA+vH8IYSKigpVVFRcdx+/369gMBj3UACA9JeU94BqampUUFCgIUOGaP78+Tp79myX+7a3tysSicRsAID0l/AATZo0Sa+//rqqq6v10ksvqba2VhUVFbp8+XKn+1dVVSkQCES34uLiRI8EAOiGEv5zQNOnT4/+efjw4RoxYoQGDx6smpoajR8//pr9ly5dGvOzCpFIhAgBwG0g6R/DHjRokPLz89XQ0NDp436/X7m5uTEbACD9JT1AJ06c0NmzZ1VUVJTspwIApBDP34I7f/58zKuZxsZGHTp0SHl5ecrLy9OLL76oadOmKRgM6ujRo3rmmWd09913a+LEiQkdHACQ2jwH6MCBA3r44Yejtz9//2bWrFlavXq1Dh8+rD/+8Y9qaWlRKBTShAkT9POf/1x+vz9xUwMAUp7nAI0bN07OuS4f//vf/35TA+HmXLp0yfOaJUuWxPVcLS0tntc88sgjntf06tXL85rW1lbPayRp//79ntf85Cc/8bymvr7e8xog3XAtOACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJhI+K/kRuo5c+ZMXOsWLlx4S9b07dvX85pPPvnE8xrcnIsXL1qPgBTDKyAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQXI0W3x4VFr8jI8P7vxYEDByZ+kC785S9/uWXPhfTAKyAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQXIwVSRGZmpuc1o0ePTsIkQGLwCggAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCY8BSgqqoqjR49Wjk5OSooKNDUqVNVX18fs09bW5sqKyvVp08f3XXXXZo2bZqam5sTOjQAIPV5ClBtba0qKyu1d+9e7dixQ5cuXdKECRPU2toa3WfRokV66623tGnTJtXW1urkyZN67LHHEj44ACC1efqNqNu3b4+5vXbtWhUUFOjgwYMaO3aswuGwXnvtNa1bt07f/va3JUlr1qzR1772Ne3du1ff/OY3Ezc5ACCl3dR7QOFwWJKUl5cnSTp48KAuXbqk8vLy6D5Dhw5V//79VVdX1+nXaG9vVyQSidkAAOkv7gB1dHRo4cKFeuCBBzRs2DBJUlNTk7Kzs9W7d++YfQsLC9XU1NTp16mqqlIgEIhuxcXF8Y4EAEghcQeosrJSH374oTZs2HBTAyxdulThcDi6HT9+/Ka+HgAgNXh6D+hzCxYs0LZt27Rnzx7169cven8wGNTFixfV0tIS8yqoublZwWCw06/l9/vl9/vjGQMAkMI8vQJyzmnBggXavHmzdu3apZKSkpjHR40apaysLFVXV0fvq6+v17Fjx1RWVpaYiQEAacHTK6DKykqtW7dOW7duVU5OTvR9nUAgoJ49eyoQCGjOnDlavHix8vLylJubq6efflplZWV8Ag4AEMNTgFavXi1JGjduXMz9a9as0ezZsyVJv/nNb5SRkaFp06apvb1dEydO1O9///uEDAsASB8+55yzHuKLIpGIAoGA9RhAt5OVleV5TXt7exIm6dzAgQM9rzl27FjiB0G3EQ6HlZub2+XjXAsOAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJuL6jagAcLW2tjbrEZBieAUEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCih/UAAL6cjo4Oz2v27NkT13ONHTs2rnWAF7wCAgCYIEAAABOeAlRVVaXRo0crJydHBQUFmjp1qurr62P2GTdunHw+X8z21FNPJXRoAEDq8xSg2tpaVVZWau/evdqxY4cuXbqkCRMmqLW1NWa/uXPn6tSpU9FtxYoVCR0aAJD6PH0IYfv27TG3165dq4KCAh08eDDmTcs777xTwWAwMRMCANLSTb0HFA6HJUl5eXkx97/xxhvKz8/XsGHDtHTpUl24cKHLr9He3q5IJBKzAQDSX9wfw+7o6NDChQv1wAMPaNiwYdH7Z86cqQEDBigUCunw4cN69tlnVV9frzfffLPTr1NVVaUXX3wx3jEAACnK55xz8SycP3++/va3v+ntt99Wv379utxv165dGj9+vBoaGjR48OBrHm9vb1d7e3v0diQSUXFxcTwjAWktMzPT85rq6uq4niuenwOK59vup0+f9rwGqSMcDis3N7fLx+N6BbRgwQJt27ZNe/bsuW58JKm0tFSSugyQ3++X3++PZwwAQArzFCDnnJ5++mlt3rxZNTU1KikpueGaQ4cOSZKKioriGhAAkJ48BaiyslLr1q3T1q1blZOTo6amJklSIBBQz549dfToUa1bt07f+c531KdPHx0+fFiLFi3S2LFjNWLEiKT8BwAAUpOnAK1evVrSlR82/aI1a9Zo9uzZys7O1s6dO7Vy5Uq1traquLhY06ZN03PPPZewgQEA6cHzt+Cup7i4WLW1tTc1EADg9hD3p+CSJRKJKBAIWI8BALhJN/oUHBcjBQCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwES3C5BzznoEAEAC3Ojv824XoHPnzlmPAABIgBv9fe5z3ewlR0dHh06ePKmcnBz5fL6YxyKRiIqLi3X8+HHl5uYaTWiP43AFx+EKjsMVHIcrusNxcM7p3LlzCoVCysjo+nVOj1s405eSkZGhfv36XXef3Nzc2/oE+xzH4QqOwxUchys4DldYH4dAIHDDfbrdt+AAALcHAgQAMJFSAfL7/Vq+fLn8fr/1KKY4DldwHK7gOFzBcbgilY5Dt/sQAgDg9pBSr4AAAOmDAAEATBAgAIAJAgQAMJEyAVq1apUGDhyoO+64Q6Wlpdq/f7/1SLfcCy+8IJ/PF7MNHTrUeqyk27NnjyZPnqxQKCSfz6ctW7bEPO6c07Jly1RUVKSePXuqvLxcR44csRk2iW50HGbPnn3N+TFp0iSbYZOkqqpKo0ePVk5OjgoKCjR16lTV19fH7NPW1qbKykr16dNHd911l6ZNm6bm5majiZPjyxyHcePGXXM+PPXUU0YTdy4lArRx40YtXrxYy5cv13vvvaeRI0dq4sSJOn36tPVot9x9992nU6dORbe3337beqSka21t1ciRI7Vq1apOH1+xYoVeeeUVvfrqq9q3b5969eqliRMnqq2t7RZPmlw3Og6SNGnSpJjzY/369bdwwuSrra1VZWWl9u7dqx07dujSpUuaMGGCWltbo/ssWrRIb731ljZt2qTa2lqdPHlSjz32mOHUifdljoMkzZ07N+Z8WLFihdHEXXApYMyYMa6ysjJ6+/Llyy4UCrmqqirDqW695cuXu5EjR1qPYUqS27x5c/R2R0eHCwaD7le/+lX0vpaWFuf3+9369esNJrw1rj4Ozjk3a9YsN2XKFJN5rJw+fdpJcrW1tc65K//vs7Ky3KZNm6L7/Pvf/3aSXF1dndWYSXf1cXDOuW9961vuhz/8od1QX0K3fwV08eJFHTx4UOXl5dH7MjIyVF5errq6OsPJbBw5ckShUEiDBg3SE088oWPHjlmPZKqxsVFNTU0x50cgEFBpaelteX7U1NSooKBAQ4YM0fz583X27FnrkZIqHA5LkvLy8iRJBw8e1KVLl2LOh6FDh6p///5pfT5cfRw+98Ybbyg/P1/Dhg3T0qVLdeHCBYvxutTtLkZ6tTNnzujy5csqLCyMub+wsFAfffSR0VQ2SktLtXbtWg0ZMkSnTp3Siy++qIceekgffvihcnJyrMcz0dTUJEmdnh+fP3a7mDRpkh577DGVlJTo6NGj+ulPf6qKigrV1dUpMzPTeryE6+jo0MKFC/XAAw9o2LBhkq6cD9nZ2erdu3fMvul8PnR2HCRp5syZGjBggEKhkA4fPqxnn31W9fX1evPNNw2njdXtA4T/V1FREf3ziBEjVFpaqgEDBujPf/6z5syZYzgZuoPp06dH/zx8+HCNGDFCgwcPVk1NjcaPH284WXJUVlbqww8/vC3eB72ero7DvHnzon8ePny4ioqKNH78eB09elSDBw++1WN2qtt/Cy4/P1+ZmZnXfIqlublZwWDQaKruoXfv3rr33nvV0NBgPYqZz88Bzo9rDRo0SPn5+Wl5fixYsEDbtm3T7t27Y359SzAY1MWLF9XS0hKzf7qeD10dh86UlpZKUrc6H7p9gLKzszVq1ChVV1dH7+vo6FB1dbXKysoMJ7N3/vx5HT16VEVFRdajmCkpKVEwGIw5PyKRiPbt23fbnx8nTpzQ2bNn0+r8cM5pwYIF2rx5s3bt2qWSkpKYx0eNGqWsrKyY86G+vl7Hjh1Lq/PhRsehM4cOHZKk7nU+WH8K4svYsGGD8/v9bu3ate5f//qXmzdvnuvdu7dramqyHu2W+tGPfuRqampcY2Oje+edd1x5ebnLz893p0+fth4tqc6dO+fef/999/777ztJ7uWXX3bvv/++++9//+ucc+6Xv/yl6927t9u6das7fPiwmzJliispKXGffvqp8eSJdb3jcO7cObdkyRJXV1fnGhsb3c6dO93999/v7rnnHtfW1mY9esLMnz/fBQIBV1NT406dOhXdLly4EN3nqaeecv3793e7du1yBw4ccGVlZa6srMxw6sS70XFoaGhwP/vZz9yBAwdcY2Oj27p1qxs0aJAbO3as8eSxUiJAzjn3u9/9zvXv399lZ2e7MWPGuL1791qPdMs9/vjjrqioyGVnZ7uvfvWr7vHHH3cNDQ3WYyXd7t27naRrtlmzZjnnrnwU+/nnn3eFhYXO7/e78ePHu/r6etuhk+B6x+HChQtuwoQJrm/fvi4rK8sNGDDAzZ07N+3+kdbZf78kt2bNmug+n376qfvBD37gvvKVr7g777zTPfroo+7UqVN2QyfBjY7DsWPH3NixY11eXp7z+/3u7rvvdj/+8Y9dOBy2Hfwq/DoGAICJbv8eEAAgPREgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJv4P8+G5Jf8zj5cAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "predict_image(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "7b1dd60a-05eb-4c3a-9209-ba598be45bb9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction: [1]\n", + "Label: 1\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "predict_image(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5d69171e-e864-44a6-9e51-183002a47c90", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction: [1]\n", + "Label: 1\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "predict_image(3)" + ] + }, + { + "cell_type": "markdown", + "id": "747cd3d0-29d2-444b-83dc-1c4b57687704", + "metadata": {}, + "source": [ + "### **Binary-Class Classification**\n", + "\n", + "### **Dataset: [Breast Cancer Wisconsin (Diagnostic) Data Set](https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29)**" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7711d2b6-5aab-471c-aa45-06e0c5ddcb2c", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"binaryclass_train.csv\", header = None)\n", + "data[\"label\"] = data[1].apply(lambda x: 1 if x == \"M\" else 0)\n", + "train, test = train_test_split(data, test_size = 0.3)\n", + "train_data = train.loc[:, ~train.columns.isin([0, 1, \"label\"])].to_numpy()\n", + "train_target = train[\"label\"].to_numpy()\n", + "test_data = test.loc[:, ~test.columns.isin([0, 1, \"label\"])].to_numpy()\n", + "test_target = test[\"label\"].to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "eaa8f0cc-78f0-4984-b701-437195559a4a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...232425262728293031label
421906564B14.69013.9898.22656.10.103100.183600.1450000.063000...18.34114.10809.20.131200.363500.321900.110800.28270.092080
30689344B13.20015.8284.07537.30.085110.052510.0014610.003261...20.4592.00636.90.112800.134600.011200.025000.26510.083850
542921644B14.74025.4294.70668.60.082750.072140.0410500.030270...32.29107.40826.40.106000.137600.161100.109500.27220.069560
492914062M18.01020.56118.401007.00.100100.128900.1170000.077620...26.06143.401426.00.130900.232700.254400.148900.32510.076251
56892751B7.76024.5447.92181.00.052630.043620.0000000.000000...30.3759.16268.60.089960.064440.000000.000000.28710.070390
..................................................................
30489296B11.46018.1673.59403.10.088530.076940.0334400.015020...21.6182.69489.80.114400.178900.122600.055090.22080.076380
17087139402B12.32012.3978.85464.10.102800.069810.0398700.037000...15.6486.97549.10.138500.126600.124200.093910.28270.067710
56857637M19.21018.57125.501152.00.105300.126700.1323000.089940...28.14170.102145.00.162400.351100.387900.209100.35370.082941
439909410B14.02015.6689.59606.50.079660.055810.0208700.026520...19.3196.53688.90.103400.101700.062600.082160.21360.067100
424907145B9.74219.1261.93289.70.107500.083330.0089340.019670...23.1771.79380.90.139800.135200.020850.045890.31960.080090
\n", + "

398 rows × 33 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 \\\n", + "421 906564 B 14.690 13.98 98.22 656.1 0.10310 0.18360 0.145000 \n", + "306 89344 B 13.200 15.82 84.07 537.3 0.08511 0.05251 0.001461 \n", + "542 921644 B 14.740 25.42 94.70 668.6 0.08275 0.07214 0.041050 \n", + "492 914062 M 18.010 20.56 118.40 1007.0 0.10010 0.12890 0.117000 \n", + "568 92751 B 7.760 24.54 47.92 181.0 0.05263 0.04362 0.000000 \n", + ".. ... .. ... ... ... ... ... ... ... \n", + "304 89296 B 11.460 18.16 73.59 403.1 0.08853 0.07694 0.033440 \n", + "170 87139402 B 12.320 12.39 78.85 464.1 0.10280 0.06981 0.039870 \n", + "56 857637 M 19.210 18.57 125.50 1152.0 0.10530 0.12670 0.132300 \n", + "439 909410 B 14.020 15.66 89.59 606.5 0.07966 0.05581 0.020870 \n", + "424 907145 B 9.742 19.12 61.93 289.7 0.10750 0.08333 0.008934 \n", + "\n", + " 9 ... 23 24 25 26 27 28 29 \\\n", + "421 0.063000 ... 18.34 114.10 809.2 0.13120 0.36350 0.32190 0.11080 \n", + "306 0.003261 ... 20.45 92.00 636.9 0.11280 0.13460 0.01120 0.02500 \n", + "542 0.030270 ... 32.29 107.40 826.4 0.10600 0.13760 0.16110 0.10950 \n", + "492 0.077620 ... 26.06 143.40 1426.0 0.13090 0.23270 0.25440 0.14890 \n", + "568 0.000000 ... 30.37 59.16 268.6 0.08996 0.06444 0.00000 0.00000 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "304 0.015020 ... 21.61 82.69 489.8 0.11440 0.17890 0.12260 0.05509 \n", + "170 0.037000 ... 15.64 86.97 549.1 0.13850 0.12660 0.12420 0.09391 \n", + "56 0.089940 ... 28.14 170.10 2145.0 0.16240 0.35110 0.38790 0.20910 \n", + "439 0.026520 ... 19.31 96.53 688.9 0.10340 0.10170 0.06260 0.08216 \n", + "424 0.019670 ... 23.17 71.79 380.9 0.13980 0.13520 0.02085 0.04589 \n", + "\n", + " 30 31 label \n", + "421 0.2827 0.09208 0 \n", + "306 0.2651 0.08385 0 \n", + "542 0.2722 0.06956 0 \n", + "492 0.3251 0.07625 1 \n", + "568 0.2871 0.07039 0 \n", + ".. ... ... ... \n", + "304 0.2208 0.07638 0 \n", + "170 0.2827 0.06771 0 \n", + "56 0.3537 0.08294 1 \n", + "439 0.2136 0.06710 0 \n", + "424 0.3196 0.08009 0 \n", + "\n", + "[398 rows x 33 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "751772e4-e90b-4c11-ae63-cdb9602529e7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---- Model Summary ----\n", + "Layer 1: relu\n", + "W: (16, 30) b: (16, 1)\n", + "Trainable parameters: 496\n", + "Layer 2: relu\n", + "W: (16, 16) b: (16, 1)\n", + "Trainable parameters: 272\n", + "Layer 3: sigmoid\n", + "W: (1, 16) b: (1, 1)\n", + "Trainable parameters: 17\n" + ] + } + ], + "source": [ + "NN = NeuralNetwork(input_size = train_data.shape[1])\n", + "NN.add_layer(16, \"relu\")\n", + "NN.add_layer(16, \"relu\")\n", + "NN.add_layer(1, \"sigmoid\")\n", + "NN.compile(loss = \"binary crossentropy\")\n", + "NN.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "0f010d0a-ceef-4824-b36b-9752547248f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training finished after epoch 1000 with a loss of 0.17439436582044646.\n" + ] + } + ], + "source": [ + "hist = NN.fit(train_data, train_target, epochs = 1000, batch_size = 32, learning_rate = 0.01, verbose = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "f4a324e2-2070-43d5-8cb6-8b07cbb69078", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_history(hist);" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8b581b00-8d8e-4ea6-80c1-8f11dfbad692", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training accuracy: 0.9346733668341709\n", + "Test accuracy: 0.9298245614035088\n" + ] + } + ], + "source": [ + "train_predictions = np.round(NN.predict(train_data))\n", + "print(\"Training accuracy: \", accuracy_score(train[\"label\"].to_numpy(), train_predictions))\n", + "test_predictions = np.round(NN.predict(test_data))\n", + "print(\"Test accuracy: \", accuracy_score(test[\"label\"].to_numpy(), test_predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b49a45e0-9906-4a34-912c-f3ec10ea2fa2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/data.zip b/data.zip new file mode 100644 index 0000000..52bfd04 Binary files /dev/null and b/data.zip differ diff --git a/neuralnet-colab.ipynb b/neuralnet-colab.ipynb new file mode 100644 index 0000000..bd1f3b0 --- /dev/null +++ b/neuralnet-colab.ipynb @@ -0,0 +1,1923 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c84d618c-446a-47ea-ac6d-133eb91f9411", + "metadata": { + "tags": [] + }, + "source": [ + "# **Implementation of a Neural Network *\"from scratch\"* with NumPy**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "2dc17e4f-82ae-4023-84aa-7b3a7f36a6ac", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "import numpy as np\n", + "from typing import Tuple\n", + "from typing import List" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1c6ec59d-b246-4e8e-8488-f2281ec42d1d", + "metadata": {}, + "outputs": [], + "source": [ + "class LayerInitializer:\n", + " \"\"\"\n", + " Functions for layer weight initialization.\n", + " \"\"\"\n", + "\n", + " # He normal initialization\n", + " @staticmethod\n", + " def he_normal(size: Tuple[int], fan_in: int) -> np.array:\n", + " \"\"\"\n", + " HE NORMAL INITIALIZATION\n", + " Draws samples from a truncated normal distribution centered at 0 mean\n", + " with stddev = sqrt(2 / fan_in) where fan_in is the number of input\n", + " units per unit in the layer.\n", + " Parameters:\n", + " - size: Tuple[int] (rows, columns)\n", + " shape of the initialized weight matrix\n", + " - fan_in: int\n", + " number of input units per unit in the layer\n", + " Returns:\n", + " - np.array (rows, columns)\n", + " He normal initialized weight matrix\n", + " Ref:\n", + " https://arxiv.org/abs/1502.01852\n", + " \"\"\"\n", + " return np.random.normal(0, math.sqrt(2 / fan_in), size = size)\n", + "\n", + " # Glorot / Xavier normal initialization\n", + " @staticmethod\n", + " def glorot_normal(size: Tuple[int], fan_in: int, fan_out: int) -> np.array:\n", + " \"\"\"\n", + " GLOROT / XAVIER NORMAL INITIALIZATION\n", + " Draws samples from a truncated normal distribution centered at 0 mean\n", + " with stddev = sqrt(2 / (fan_in + fan_out)) where fan_in is the number of\n", + " input units per unit in the layer and fan_out is the number of output\n", + " units per unit in the layer.\n", + " Parameters:\n", + " - size: Tuple[int] (rows, columns)\n", + " shape of the initialized weight matrix\n", + " - fan_in: int\n", + " number of input units per unit in the layer\n", + " - fan_out: int\n", + " number of output units per unit in the layer\n", + " Returns:\n", + " - np.array (rows, columns)\n", + " Glorot normal initialized weight matrix\n", + " Ref:\n", + " http://proceedings.mlr.press/v9/glorot10a.html\n", + " \"\"\"\n", + " return np.random.normal(0, math.sqrt(2 / (fan_in + fan_out)), size = size)\n", + "\n", + " # Bias initialization\n", + " @staticmethod\n", + " def bias(size: Tuple[int]):\n", + " \"\"\"\n", + " BIAS INITIALIZATION\n", + " Initializes the bias vector / matrix with zeros.\n", + " Parameters:\n", + " - size: Tuple[int] (rows, columns)\n", + " shape of the initialized bias vector / matrix\n", + " Returns:\n", + " - np.array (rows, columns)\n", + " Zero initialized bias vector / matrix\n", + " Ref:\n", + " https://cs231n.github.io/neural-networks-2/\n", + " \"\"\"\n", + " return np.zeros(shape = size)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "88017f0b-882c-4195-9e20-564626be8284", + "metadata": {}, + "outputs": [], + "source": [ + "class ActivationFunctions:\n", + " \"\"\"\n", + " Layer activation functions.\n", + " \"\"\"\n", + "\n", + " # Rectified Linear Units\n", + " @staticmethod\n", + " def relu(x: np.array, derivative: bool = False) -> np.array:\n", + " \"\"\"\n", + " RECTIFIED LINEAR UNITS\n", + " ReLU activation function.\n", + " Parameters:\n", + " - x: np.array\n", + " input matrix to apply activation function to\n", + " - derivative: bool\n", + " if set to 'True' returns the derivative instead\n", + " DEFAULT: False\n", + " Returns:\n", + " - np.array (same shape as x)\n", + " activated x / derivative of x\n", + " Ref:\n", + " https://en.wikipedia.org/wiki/Rectifier_(neural_networks)\n", + " \"\"\"\n", + " if not derivative:\n", + " return np.maximum(x, 0)\n", + " else:\n", + " return np.where(x > 0, 1, 0)\n", + "\n", + " # Sigmoid activation function\n", + " @staticmethod\n", + " def sigmoid(x: np.array, derivative: bool = False) -> np.array:\n", + " \"\"\"\n", + " SIGMOID / LOGISTIC FUNCTION\n", + " Sigmoid activation function.\n", + " Parameters:\n", + " - x: np.array\n", + " input matrix to apply activation function to\n", + " - derivative: bool\n", + " if set to 'True' returns the derivative instead\n", + " DEFAULT: False\n", + " Returns:\n", + " - np.array (same shape as x)\n", + " activated x / derivative of x\n", + " Refs:\n", + " https://en.wikipedia.org/wiki/Sigmoid_function\n", + " https://en.wikipedia.org/wiki/Activation_function\n", + " \"\"\"\n", + " def f_sigmoid(x: np.array) -> np.array:\n", + " return 1 / (1 + np.exp(-x))\n", + "\n", + " if not derivative:\n", + " return f_sigmoid(x)\n", + " else:\n", + " return f_sigmoid(x) * (1 - f_sigmoid(x))\n", + "\n", + " # Softmax activation function\n", + " @staticmethod\n", + " def softmax(x: np.array, derivative: bool = False) -> np.array:\n", + " \"\"\"\n", + " SOFTMAX FUNCTION\n", + " Stable softmax activation function.\n", + " Parameters:\n", + " - x: np.array\n", + " input matrix to apply activation function to\n", + " Returns:\n", + " - np.array (same shape as x)\n", + " activated x\n", + " Refs:\n", + " https://en.wikipedia.org/wiki/Softmax_function\n", + " https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/\n", + " \"\"\"\n", + " if not derivative:\n", + " n = np.exp(x - np.max(x)) # stable softmax\n", + " d = np.sum(n, axis = 0)\n", + " return n / d\n", + " else:\n", + " raise NotImplementedError(\"Softmax derivative not implemented!\")\n", + " # https://stackoverflow.com/questions/54976533/derivative-of-softmax-function-in-python\n", + " # xr = x.reshape((-1, 1))\n", + " # return np.diagflat(x) - np.dot(xr, xr.T)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "654dc815-7e9e-46de-9f01-7d124736fbf5", + "metadata": {}, + "outputs": [], + "source": [ + "class LossFunctions:\n", + " \"\"\"\n", + " Loss functions for neural net fitting.\n", + " \"\"\"\n", + "\n", + " # binary cross entropy loss\n", + " @staticmethod\n", + " def binary_cross_entropy(y_true: np.array, y_predicted: np.array) -> np.array:\n", + " \"\"\"\n", + " BINARY CROSS ENTROPY LOSS\n", + " Cross entropy loss for binary-class classification.\n", + " L[BCE] = - p(i) * log(q(i)) - (1 - p(i)) * log(1 - q(i))\n", + " where\n", + " - p(i) is the true label\n", + " - q(i) is the predicted sigmoid probability\n", + " Parameters:\n", + " - y_true: np.array (1, sample_size)\n", + " true label vector\n", + " - y_predicted: np.array (1, sample_size)\n", + " the sigmoid probability\n", + " Returns:\n", + " - np.array (sample_size,)\n", + " loss for every given sample\n", + " Ref:\n", + " https://en.wikipedia.org/wiki/Cross_entropy\n", + " \"\"\"\n", + " losses = []\n", + " for i in range(y_true.shape[1]):\n", + " ## stable BCE\n", + " losses.append(float(-1 * (y_true[:, i] * np.log(y_predicted[:, i] + 1e-7) + (1 - y_true[:, i]) * np.log(1 - y_predicted[:, i] + 1e-7))))\n", + " ## unstable BCE\n", + " # losses.append(float(-1 * (y_true[:, i] * np.log(y_predicted[:, i]) + (1 - y_true[:, i]) * np.log(1 - y_predicted[:, i]))))\n", + " return np.array(losses)\n", + "\n", + " # categorical cross entropy loss\n", + " @staticmethod\n", + " def categorical_cross_entropy(y_true: np.array, y_predicted: np.array) -> np.array:\n", + " \"\"\"\n", + " CATEGORICAL CROSS ENTROPY LOSS\n", + " Cross entropy loss for binary- and multi-class class classification.\n", + " L[CCE] = - sum[from i = 0 to n]( p(i) * log(q(i)) )\n", + " where\n", + " - p(i) is the true label\n", + " - q(i) is the predicted softmax probability\n", + " - n is the number of classes\n", + " Parameters:\n", + " - y_true: np.array (n_classes, sample_size)\n", + " one-hot encoded true label vector\n", + " - y_predicted: np.array (n_classes, sample_size)\n", + " the softmax probabilities\n", + " Returns:\n", + " - np.array (sample_size,)\n", + " loss for every given sample\n", + " Ref:\n", + " https://en.wikipedia.org/wiki/Cross_entropy\n", + " \"\"\"\n", + " losses = []\n", + " for i in range(y_true.shape[1]):\n", + " ## stable CCE\n", + " # losses.append(float(-1 * np.sum(y_true[:, i] * np.log(y_predicted[:, i] + 1e-7))))\n", + " ## unstable CCE\n", + " losses.append(float(-1 * np.sum(y_true[:, i] * np.log(y_predicted[:, i]))))\n", + "\n", + " return np.array(losses)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6c6c347f-a0d8-4494-b8cf-f29295f42877", + "metadata": {}, + "outputs": [], + "source": [ + "class NeuralNetwork:\n", + " \"\"\"\n", + " Implementation of a classic feed-forward neural network that is trained via\n", + " backpropagation. Adopts a Keras-like interface for convenient usage (see\n", + " https://michabirklbauer.github.io/neuralnet for examples).\n", + " \"\"\"\n", + "\n", + " # constructor\n", + " def __init__(self, input_size: int):\n", + " \"\"\"\n", + " CONSTRUCTOR\n", + " Initializes the neural network model.\n", + " Parameters:\n", + " - input_size: int\n", + " nr. of features in the training data\n", + " Returns:\n", + " - None\n", + " Example usage:\n", + " NN = NeuralNetwork(data.shape[1])\n", + " \"\"\"\n", + " self.input_size = input_size\n", + " self.architecture = []\n", + " self.layers = []\n", + "\n", + " # adding layers\n", + " def add_layer(self, units: int, activation: str = \"relu\", initialization: str = None) -> None:\n", + " \"\"\"\n", + " LAYER MANAGEMENT\n", + " Construct the neural network architecture by adding different layers.\n", + " Parameters:\n", + " - units: int\n", + " nr. of units in the layer\n", + " - activation: str, one of (\"relu\", \"sigmoid\", \"softmax\")\n", + " activation function of the layer\n", + " DEFAULT: \"relu\"\n", + " - initialization: str, one of (\"he\", \"glorot\")\n", + " weight initialization to use\n", + " DEFAULT: None, \"relu\" layers are 'he normal' initialized,\n", + " all other layers are 'glorot normal'\n", + " initialized\n", + " Returns:\n", + " - None\n", + " Example usage:\n", + " NN = NeuralNetwork(data.shape[1])\n", + " NN.add_layer(16, \"relu\", \"glorot\")\n", + " NN.add_layer(8)\n", + " NN.add_layer(1, \"sigmoid\")\n", + " \"\"\"\n", + " if initialization == None:\n", + " if activation == \"relu\":\n", + " layer_init = \"he\"\n", + " else:\n", + " layer_init = \"glorot\"\n", + " else:\n", + " layer_init = initialization\n", + "\n", + " self.architecture.append({\"units\": units, \"activation\": activation, \"init\": layer_init})\n", + "\n", + " # compiling model\n", + " def compile(self, loss: str = \"categorical crossentropy\") -> None:\n", + " \"\"\"\n", + " MODEL INITIALIZATION\n", + " Initializes all parameters of the neural network architecture and\n", + " prepares the model for training.\n", + " Parameters:\n", + " - loss: str, one of (\"binary crossentropy\", \"categorical crossentropy\")\n", + " the loss function that should be used for training\n", + " DEFAULT: \"categorical crossentropy\"\n", + " Returns:\n", + " - None\n", + " Example usage:\n", + " NN = NeuralNetwork(data.shape[1])\n", + " NN.add_layer(16, \"relu\", \"glorot\")\n", + " NN.add_layer(8)\n", + " NN.add_layer(1, \"sigmoid\")\n", + " NN.compile(\"binary crossentropy\")\n", + " \"\"\"\n", + " self.loss = loss\n", + "\n", + " # initialize all layer weights and biases\n", + " for i in range(len(self.architecture)):\n", + " units = self.architecture[i][\"units\"]\n", + " activation = self.architecture[i][\"activation\"]\n", + " init = self.architecture[i][\"init\"]\n", + "\n", + " units_previous_layer = self.input_size\n", + " if i > 0:\n", + " units_previous_layer = self.architecture[i - 1][\"units\"]\n", + " units_next_layer = 0\n", + " if i < len(self.architecture) - 1:\n", + " units_next_layer = self.architecture[i + 1][\"units\"]\n", + "\n", + " if init == \"he\":\n", + " W = LayerInitializer.he_normal((units, units_previous_layer), fan_in = units_previous_layer)\n", + " b = LayerInitializer.bias((units, 1))\n", + " elif init == \"glorot\":\n", + " W = LayerInitializer.glorot_normal((units, units_previous_layer), fan_in = units_previous_layer, fan_out = units_next_layer)\n", + " b = LayerInitializer.bias((units, 1))\n", + " else:\n", + " raise NotImplementedError(\"Layer initialization '\" + init + \"' not implemented!\")\n", + "\n", + " self.layers.append({\"W\": W, \"b\": b, \"activation\": activation})\n", + "\n", + " # forward propagation\n", + " def __forward_propagation(self, data: np.array) -> None:\n", + " \"\"\"\n", + " FORWARD PROPAGATION (INTERNAL)\n", + " Internal function calculating the forward pass of A(Wx + b).\n", + " - The result of 'Wx + b' (L) is stored in self.layers[layer][\"L\"]\n", + " - The result of 'Activation(L)' (A) is stored in self.layers[layer][\"A\"]\n", + " Parameters:\n", + " - data: np.array\n", + " input data for the forward pass\n", + " Returns:\n", + " - None, \"L\" and \"A\" are set in the layer dictionary, to retrieve the\n", + " last layer output call 'self.layers[-1][\"A\"]'\n", + " \"\"\"\n", + "\n", + " for i in range(len(self.layers)):\n", + "\n", + " if i == 0:\n", + " A = data\n", + " else:\n", + " A = self.layers[i - 1][\"A\"]\n", + "\n", + " # Wx + b where x is the input data for the first layer and otherwise\n", + " # the output (A) of the previous layer\n", + " self.layers[i][\"L\"] = self.layers[i][\"W\"].dot(A) + self.layers[i][\"b\"]\n", + " if self.layers[i][\"activation\"] == \"relu\":\n", + " self.layers[i][\"A\"] = ActivationFunctions.relu(self.layers[i][\"L\"])\n", + " elif self.layers[i][\"activation\"] == \"sigmoid\":\n", + " self.layers[i][\"A\"] = ActivationFunctions.sigmoid(self.layers[i][\"L\"])\n", + " elif self.layers[i][\"activation\"] == \"softmax\":\n", + " self.layers[i][\"A\"] = ActivationFunctions.softmax(self.layers[i][\"L\"])\n", + " else:\n", + " raise NotImplementedError(\"Activation function '\" + layer[\"activation\"] + \"' not implemented!\")\n", + "\n", + " # back propagation\n", + " def __back_propagation(self, data: np.array, target: np.array, learning_rate: float = 0.1) -> float:\n", + " \"\"\"\n", + " BACK PROPAGATION (INTERNAL)\n", + " Internal function for learning layer weights and biases using gradient\n", + " descent and back propagation.\n", + " Parameters:\n", + " - data: np.array\n", + " input data\n", + " - target: np.array\n", + " class labels of the input data\n", + " - learning_rate: float\n", + " learning rate / how far in the direction of the gradient to\n", + " go\n", + " DEFAULT: 0.1\n", + " Returns:\n", + " - float\n", + " loss of the current forward pass\n", + " \"\"\"\n", + " # forward pass\n", + " self.__forward_propagation(data)\n", + "\n", + " output = self.layers[-1][\"A\"]\n", + " batch_size = data.shape[1]\n", + " loss = 0\n", + "\n", + " # calculate loss of the current forward pass\n", + " if self.loss == \"categorical crossentropy\":\n", + " losses = LossFunctions.categorical_cross_entropy(y_true = target, y_predicted = output)\n", + " # reduction by sum over batch size\n", + " loss = float(np.sum(losses) / batch_size)\n", + " elif self.loss == \"binary crossentropy\":\n", + " losses = LossFunctions.binary_cross_entropy(y_true = target, y_predicted = output)\n", + " # reduction by sum over batch size\n", + " loss = float(np.sum(losses) / batch_size)\n", + " else:\n", + " raise NotImplementedError(\"Loss function '\" + self.loss + \"' not implemented!\")\n", + "\n", + " # calculate and back pass the derivate of the loss w.r.t the output\n", + " # activation function\n", + " # this implementation suppports CCE + Softmax and BCE + Sigmoid in the\n", + " # output layer\n", + " if self.loss == \"categorical crossentropy\" and self.layers[-1][\"activation\"] == \"softmax\":\n", + " # for categorical cross entropy loss the derivative of softmax simplifies to\n", + " # P(i) - Y(i)\n", + " # where P(i) is the softmax output and Y(i) is the true label\n", + " # https://www.ics.uci.edu/~pjsadows/notes.pdf\n", + " # https://math.stackexchange.com/questions/945871/derivative-of-softmax-loss-function\n", + " previous_layer_activation = data.T if len(self.layers) == 1 else self.layers[len(self.layers) - 2][\"A\"].T\n", + " dL = self.layers[-1][\"A\"] - target\n", + " dW = dL.dot(previous_layer_activation) / batch_size\n", + " db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size\n", + "\n", + " # parameter tracking\n", + " previous_dL = np.copy(dL)\n", + " previous_W = np.copy(self.layers[-1][\"W\"])\n", + "\n", + " # update\n", + " self.layers[-1][\"W\"] -= learning_rate * dW\n", + " self.layers[-1][\"b\"] -= learning_rate * db\n", + " elif self.loss == \"binary crossentropy\" and self.layers[-1][\"activation\"] == \"sigmoid\":\n", + " # for binary cross entropy loss the derivative of the loss function is\n", + " # L' = -1 * (Y(i) / P(i) - (1 - Y(i)) / (1 - P(i)))\n", + " # where P(i) is the sigmoid output and Y(i) is the true label\n", + " # and we multiply that with the derivative of the sigmoid function [1]\n", + " # https://math.stackexchange.com/questions/2503428/derivative-of-binary-cross-entropy-why-are-my-signs-not-right\n", + " previous_layer_activation = data.T if len(self.layers) == 1 else self.layers[len(self.layers) - 2][\"A\"].T\n", + " # [1]\n", + " # A = np.clip(self.layers[-1][\"A\"], 1e-7, 1 - 1e-7)\n", + " # derivative_loss = -1 * np.divide(target, A) + np.divide(1 - target, 1 - A)\n", + " # dL = derivative_loss * ActivationFunctions.sigmoid(self.layers[-1][\"L\"], derivative = True)\n", + " # alternatively we can directly simplify the derivative of the binary cross entropy loss\n", + " # with sigmoid activation function to\n", + " # P(i) - Y(i)\n", + " # where P(i) is the sigmoid output and Y(i) is the true label\n", + " # done in [2]\n", + " # https://math.stackexchange.com/questions/4227931/what-is-the-derivative-of-binary-cross-entropy-loss-w-r-t-to-input-of-sigmoid-fu\n", + " # [2]\n", + " dL = (self.layers[-1][\"A\"] - target) / batch_size\n", + " dW = dL.dot(previous_layer_activation) / batch_size\n", + " db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size\n", + "\n", + " # parameter tracking\n", + " previous_dL = np.copy(dL)\n", + " previous_W = np.copy(self.layers[-1][\"W\"])\n", + "\n", + " # update\n", + " self.layers[-1][\"W\"] -= learning_rate * dW\n", + " self.layers[-1][\"b\"] -= learning_rate * db\n", + " else:\n", + " raise NotImplementedError(\"The combination of '\" + self.loss + \" loss' and '\" + self.layers[i][\"activation\"] + \" activation' is not implemented!\")\n", + "\n", + " # back propagation through the remaining hidden layers\n", + " for i in reversed(range(len(self.layers) - 1)):\n", + "\n", + " if i == 0:\n", + " if self.layers[i][\"activation\"] == \"relu\":\n", + " dL = previous_W.T.dot(previous_dL) * ActivationFunctions.relu(self.layers[i][\"L\"], derivative = True)\n", + " dW = dL.dot(data.T) / batch_size\n", + " db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size\n", + " elif self.layers[i][\"activation\"] == \"sigmoid\":\n", + " dL = previous_W.T.dot(previous_dL) * ActivationFunctions.sigmoid(self.layers[i][\"L\"], derivative = True)\n", + " dW = dL.dot(data.T) / batch_size\n", + " db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size\n", + " else:\n", + " raise NotImplementedError(\"Activation function '\" + self.layers[i][\"activation\"] + \"' not implemented for hidden layers!\")\n", + "\n", + " # parameter tracking\n", + " previous_dL = np.copy(dL)\n", + " previous_W = np.copy(self.layers[i][\"W\"])\n", + "\n", + " #update\n", + " self.layers[i][\"W\"] -= learning_rate * dW\n", + " self.layers[i][\"b\"] -= learning_rate * db\n", + " else:\n", + " if self.layers[i][\"activation\"] == \"relu\":\n", + " dL = previous_W.T.dot(previous_dL) * ActivationFunctions.relu(self.layers[i][\"L\"], derivative = True)\n", + " dW = dL.dot(self.layers[i - 1][\"A\"].T) / batch_size\n", + " db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size\n", + " elif self.layers[i][\"activation\"] == \"sigmoid\":\n", + " dL = previous_W.T.dot(previous_dL) * ActivationFunctions.sigmoid(self.layers[i][\"L\"], derivative = True)\n", + " dW = dL.dot(self.layers[i - 1][\"A\"].T) / batch_size\n", + " db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size\n", + " else:\n", + " raise NotImplementedError(\"Activation function '\" + self.layers[i][\"activation\"] + \"' not implemented for hidden layers!\")\n", + "\n", + " # parameter tracking\n", + " previous_dL = np.copy(dL)\n", + " previous_W = np.copy(self.layers[i][\"W\"])\n", + "\n", + " #update\n", + " self.layers[i][\"W\"] -= learning_rate * dW\n", + " self.layers[i][\"b\"] -= learning_rate * db\n", + "\n", + " return loss\n", + "\n", + " # neural network architecture summary\n", + " def summary(self) -> None:\n", + " \"\"\"\n", + " MODEL SUMMARY\n", + " Print a summary of the neural network architecture.\n", + " Parameters:\n", + " - None\n", + " Returns:\n", + " - None, prints a summary of the neural network architecture to\n", + " stdout\n", + " Example usage:\n", + " NN.summary()\n", + " \"\"\"\n", + " print(\"---- Model Summary ----\")\n", + " for i, layer in enumerate(self.layers):\n", + " print(\"Layer \" + str(i + 1) + \": \" + layer[\"activation\"])\n", + " if \"L\" in layer:\n", + " print(\"W: \" + str(layer[\"W\"].shape) + \" \" +\n", + " \"b: \" + str(layer[\"b\"].shape) + \" \" +\n", + " \"L: \" + str(layer[\"L\"].shape) + \" \" +\n", + " \"A: \" + str(layer[\"A\"].shape))\n", + " else:\n", + " print(\"W: \" + str(layer[\"W\"].shape) + \" \" +\n", + " \"b: \" + str(layer[\"b\"].shape))\n", + " print(\"Trainable parameters: \" + str(\n", + " layer[\"W\"].shape[0] * layer[\"W\"].shape[1] +\n", + " layer[\"b\"].shape[0] * layer[\"b\"].shape[1]))\n", + "\n", + " # train neural network on data\n", + " def fit(self, X: np.array, y: np.array, epochs: int = 100, batch_size: int = 32, learning_rate: float = 0.1, verbose: int = 1) -> List[float]:\n", + " \"\"\"\n", + " TRAIN MODEL\n", + " Train the neural network.\n", + " Parameters:\n", + " - X: np.array (samples, features)\n", + " input data to train on\n", + " - y: np.array (samples, labels) or (labels,)\n", + " labels of the input data\n", + " - epochs: int\n", + " how many iterations to train\n", + " DEFAULT: 100\n", + " - batch_size: int\n", + " how many samples to use per backward pass\n", + " DEFAULT: 32\n", + " - learning_rate: float\n", + " learning rate / how far in the direction of the gradient to\n", + " go\n", + " DEFAULT: 0.1\n", + " - verbose: int, one of (0, 1) / bool\n", + " print information for every epoch\n", + " DEFAULT: 1 (True)\n", + " Returns:\n", + " - List[float]\n", + " loss history over all epochs\n", + " Example usage:\n", + " NN.fit(data_train, labels_train)\n", + " \"\"\"\n", + " # reshaping inputs\n", + " if y.ndim == 1:\n", + " y = np.reshape(y, (-1, 1))\n", + "\n", + " data = X.T\n", + " target = y.T\n", + " sample_size = data.shape[1]\n", + "\n", + " history = []\n", + "\n", + " # train network\n", + " for i in range(epochs):\n", + " if verbose:\n", + " print(\"Training epoch \" + str(i + 1) + \"...\")\n", + " # generate random batches of size batch_size\n", + " idx = np.random.choice(sample_size, sample_size, replace = False)\n", + " batches = np.array_split(idx, math.ceil(sample_size / batch_size))\n", + " batch_losses = []\n", + " for batch in batches:\n", + " current_data = data[:, batch]\n", + " current_target = target[:, batch]\n", + " batch_loss = self.__back_propagation(current_data, current_target, learning_rate = learning_rate)\n", + " batch_losses.append(batch_loss)\n", + " history.append(np.mean(batch_losses))\n", + " if verbose:\n", + " print(\"Current loss: \", np.mean(batch_losses))\n", + " print(\"Epoch \" + str(i + 1) + \" done!\")\n", + "\n", + " print(\"Training finished after epoch \" + str(epochs) + \" with a loss of \" + str(history[-1]) + \".\")\n", + "\n", + " return history\n", + "\n", + " # predict data with fitted neural network\n", + " def predict(self, X: np.array) -> np.array:\n", + " \"\"\"\n", + " GENERATE PREDICTIONS\n", + " Predict labels for the given input data.\n", + " Parameters:\n", + " - X: np.array (samples, features) or (features,)\n", + " input data to predict\n", + " Returns:\n", + " - np.array\n", + " predictions\n", + " Example usage:\n", + " NN.predict(data_test)\n", + " \"\"\"\n", + " if X.ndim == 1:\n", + " X = np.reshape(X, (1, -1))\n", + "\n", + " self.__forward_propagation(X.T)\n", + "\n", + " return self.layers[-1][\"A\"].T" + ] + }, + { + "cell_type": "markdown", + "id": "a60f041d-d688-4b00-8bc1-3e01da0d947f", + "metadata": { + "tags": [] + }, + "source": [ + "# **Example Usage of `neuralnet.py / class NeuralNetwork`**\n", + "\n", + "### **Multi-Class Classification**\n", + "\n", + "### **Dataset: [MNIST](http://yann.lecun.com/exdb/mnist/index.html)**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb8e53f6-c5bf-4221-8d49-f3bc804b438d", + "metadata": {}, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/michabirklbauer/neuralnet/master/data.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c7a8280c-d0b9-41d5-88e1-993db76a73b4", + "metadata": {}, + "outputs": [], + "source": [ + "from zipfile import ZipFile as zip\n", + "\n", + "with zip(\"data.zip\") as f:\n", + " f.extractall()\n", + " f.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "579b7aa9-24c5-4dd1-b8b7-719cbb1f7b09", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from matplotlib import pyplot as plt\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a8e4f9b1-140c-42ac-9b04-11e23b27d1eb", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"multiclass_train.csv\")\n", + "train, test = train_test_split(data, test_size = 0.3)\n", + "train_data = train.loc[:, train.columns != \"label\"].to_numpy() / 255\n", + "train_target = train[\"label\"].to_numpy()\n", + "test_data = test.loc[:, test.columns != \"label\"].to_numpy() / 255\n", + "test_target = test[\"label\"].to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f9a8ba9c-7255-40b3-9e5f-f999e89eb257", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
labelpixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8...pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783
251647000000000...0000000000
119049000000000...0000000000
378331000000000...0000000000
61015000000000...0000000000
250193000000000...0000000000
..................................................................
213907000000000...0000000000
76013000000000...0000000000
2241000000000...0000000000
375824000000000...0000000000
129262000000000...0000000000
\n", + "

29400 rows × 785 columns

\n", + "
" + ], + "text/plain": [ + " label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 \\\n", + "25164 7 0 0 0 0 0 0 0 0 \n", + "11904 9 0 0 0 0 0 0 0 0 \n", + "37833 1 0 0 0 0 0 0 0 0 \n", + "6101 5 0 0 0 0 0 0 0 0 \n", + "25019 3 0 0 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... ... ... \n", + "21390 7 0 0 0 0 0 0 0 0 \n", + "7601 3 0 0 0 0 0 0 0 0 \n", + "224 1 0 0 0 0 0 0 0 0 \n", + "37582 4 0 0 0 0 0 0 0 0 \n", + "12926 2 0 0 0 0 0 0 0 0 \n", + "\n", + " pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n", + "25164 0 ... 0 0 0 0 0 \n", + "11904 0 ... 0 0 0 0 0 \n", + "37833 0 ... 0 0 0 0 0 \n", + "6101 0 ... 0 0 0 0 0 \n", + "25019 0 ... 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "21390 0 ... 0 0 0 0 0 \n", + "7601 0 ... 0 0 0 0 0 \n", + "224 0 ... 0 0 0 0 0 \n", + "37582 0 ... 0 0 0 0 0 \n", + "12926 0 ... 0 0 0 0 0 \n", + "\n", + " pixel779 pixel780 pixel781 pixel782 pixel783 \n", + "25164 0 0 0 0 0 \n", + "11904 0 0 0 0 0 \n", + "37833 0 0 0 0 0 \n", + "6101 0 0 0 0 0 \n", + "25019 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "21390 0 0 0 0 0 \n", + "7601 0 0 0 0 0 \n", + "224 0 0 0 0 0 \n", + "37582 0 0 0 0 0 \n", + "12926 0 0 0 0 0 \n", + "\n", + "[29400 rows x 785 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6d2c5098-ba6b-4533-be14-a7e1395c944b", + "metadata": {}, + "outputs": [], + "source": [ + "one_hot = OneHotEncoder(sparse = False, categories = \"auto\")\n", + "train_target = one_hot.fit_transform(train_target.reshape(-1, 1))\n", + "test_target = one_hot.transform(test_target.reshape(-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7417c6e4-fd30-498c-a4de-5657ffb0e5f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---- Model Summary ----\n", + "Layer 1: relu\n", + "W: (32, 784) b: (32, 1)\n", + "Trainable parameters: 25120\n", + "Layer 2: relu\n", + "W: (16, 32) b: (16, 1)\n", + "Trainable parameters: 528\n", + "Layer 3: softmax\n", + "W: (10, 16) b: (10, 1)\n", + "Trainable parameters: 170\n" + ] + } + ], + "source": [ + "NN = NeuralNetwork(input_size = train_data.shape[1])\n", + "NN.add_layer(32, \"relu\")\n", + "NN.add_layer(16, \"relu\")\n", + "NN.add_layer(10, \"softmax\")\n", + "NN.compile(loss = \"categorical crossentropy\")\n", + "NN.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "bc46f780-ff80-43ec-8eae-0e31ecd39a30", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training epoch 1...\n", + "Current loss: 0.43747370824596604\n", + "Epoch 1 done!\n", + "Training epoch 2...\n", + "Current loss: 0.21528007156258966\n", + "Epoch 2 done!\n", + "Training epoch 3...\n", + "Current loss: 0.16742503623911392\n", + "Epoch 3 done!\n", + "Training epoch 4...\n", + "Current loss: 0.13877936553368508\n", + "Epoch 4 done!\n", + "Training epoch 5...\n", + "Current loss: 0.12099309045421619\n", + "Epoch 5 done!\n", + "Training epoch 6...\n", + "Current loss: 0.1072971880634624\n", + "Epoch 6 done!\n", + "Training epoch 7...\n", + "Current loss: 0.09396355017990504\n", + "Epoch 7 done!\n", + "Training epoch 8...\n", + "Current loss: 0.08720308198194518\n", + "Epoch 8 done!\n", + "Training epoch 9...\n", + "Current loss: 0.07927159779935378\n", + "Epoch 9 done!\n", + "Training epoch 10...\n", + "Current loss: 0.07284107143112058\n", + "Epoch 10 done!\n", + "Training epoch 11...\n", + "Current loss: 0.06600162705624461\n", + "Epoch 11 done!\n", + "Training epoch 12...\n", + "Current loss: 0.06342602649693302\n", + "Epoch 12 done!\n", + "Training epoch 13...\n", + "Current loss: 0.05783998850656874\n", + "Epoch 13 done!\n", + "Training epoch 14...\n", + "Current loss: 0.05052314129523882\n", + "Epoch 14 done!\n", + "Training epoch 15...\n", + "Current loss: 0.04563600268741524\n", + "Epoch 15 done!\n", + "Training epoch 16...\n", + "Current loss: 0.04470639462592896\n", + "Epoch 16 done!\n", + "Training epoch 17...\n", + "Current loss: 0.043506537043299306\n", + "Epoch 17 done!\n", + "Training epoch 18...\n", + "Current loss: 0.03815045738567615\n", + "Epoch 18 done!\n", + "Training epoch 19...\n", + "Current loss: 0.038454017529732515\n", + "Epoch 19 done!\n", + "Training epoch 20...\n", + "Current loss: 0.034033571538281876\n", + "Epoch 20 done!\n", + "Training epoch 21...\n", + "Current loss: 0.03033063122611392\n", + "Epoch 21 done!\n", + "Training epoch 22...\n", + "Current loss: 0.02789381646483783\n", + "Epoch 22 done!\n", + "Training epoch 23...\n", + "Current loss: 0.02688368926764838\n", + "Epoch 23 done!\n", + "Training epoch 24...\n", + "Current loss: 0.02944480698302673\n", + "Epoch 24 done!\n", + "Training epoch 25...\n", + "Current loss: 0.02519994251217897\n", + "Epoch 25 done!\n", + "Training epoch 26...\n", + "Current loss: 0.02679484096626338\n", + "Epoch 26 done!\n", + "Training epoch 27...\n", + "Current loss: 0.01805071452172742\n", + "Epoch 27 done!\n", + "Training epoch 28...\n", + "Current loss: 0.021675299545706767\n", + "Epoch 28 done!\n", + "Training epoch 29...\n", + "Current loss: 0.027434799817775905\n", + "Epoch 29 done!\n", + "Training epoch 30...\n", + "Current loss: 0.024449728356841036\n", + "Epoch 30 done!\n", + "Training finished after epoch 30 with a loss of 0.024449728356841036.\n" + ] + } + ], + "source": [ + "hist = NN.fit(train_data, train_target, epochs = 30, batch_size = 16, learning_rate = 0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5d833848-9d24-47b3-b690-d736a50ebe4c", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def plot_history(hist):\n", + " plt.plot(hist)\n", + " plt.title(\"Model Loss\")\n", + " plt.xlabel(\"Epochs\")\n", + " plt.ylabel(\"Loss\")\n", + " plt.show()\n", + " \n", + "plot_history(hist);" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7f30a5bf-caca-44fc-8d5a-8ed8863600e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training accuracy: 0.9894897959183674\n", + "Test accuracy: 0.9495238095238095\n" + ] + } + ], + "source": [ + "train_predictions = np.argmax(NN.predict(train_data), axis = 1)\n", + "print(\"Training accuracy: \", accuracy_score(train[\"label\"].to_numpy(), train_predictions))\n", + "test_predictions = np.argmax(NN.predict(test_data), axis = 1)\n", + "print(\"Test accuracy: \", accuracy_score(test[\"label\"].to_numpy(), test_predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5d3ea10a-9a1a-4f7b-8dad-3a112b3e5add", + "metadata": {}, + "outputs": [], + "source": [ + "def predict_image(index):\n", + " current_image = test_data[index, :]\n", + " prediction = np.argmax(NN.predict(current_image), axis = 1)\n", + " label = test[\"label\"].to_numpy()[index]\n", + " print(\"Prediction: \", prediction)\n", + " print(\"Label: \", label)\n", + " \n", + " current_image = current_image.reshape((28, 28)) * 255\n", + " plt.gray()\n", + " plt.imshow(current_image, interpolation = \"nearest\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e143b0a5-0cf2-43b7-894c-8497e17b4461", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction: [5]\n", + "Label: 5\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "predict_image(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7b1dd60a-05eb-4c3a-9209-ba598be45bb9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction: [4]\n", + "Label: 4\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "predict_image(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "5d69171e-e864-44a6-9e51-183002a47c90", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction: [2]\n", + "Label: 2\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "predict_image(3)" + ] + }, + { + "cell_type": "markdown", + "id": "747cd3d0-29d2-444b-83dc-1c4b57687704", + "metadata": {}, + "source": [ + "### **Binary-Class Classification**\n", + "\n", + "### **Dataset: [Breast Cancer Wisconsin (Diagnostic) Data Set](https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29)**" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "7711d2b6-5aab-471c-aa45-06e0c5ddcb2c", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"binaryclass_train.csv\", header = None)\n", + "data[\"label\"] = data[1].apply(lambda x: 1 if x == \"M\" else 0)\n", + "train, test = train_test_split(data, test_size = 0.3)\n", + "train_data = train.loc[:, ~train.columns.isin([0, 1, \"label\"])].to_numpy()\n", + "train_target = train[\"label\"].to_numpy()\n", + "test_data = test.loc[:, ~test.columns.isin([0, 1, \"label\"])].to_numpy()\n", + "test_target = test[\"label\"].to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "eaa8f0cc-78f0-4984-b701-437195559a4a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...232425262728293031label
361901041B13.3021.5785.24546.10.085820.063730.033440.02424...29.2092.94621.20.11400.166700.121200.056140.26370.066580
186874217M18.3118.58118.601041.00.085880.084680.081690.05814...26.36139.201410.00.12340.244500.353800.157100.32060.069381
199877500M14.4520.2294.49642.70.098720.120600.118000.05980...30.12117.901044.00.15520.405600.496700.183800.47530.101301
38990312M19.5523.21128.901174.00.101000.131800.185600.10210...30.44142.001313.00.12510.241400.382900.182500.25760.076021
388903011B11.2715.5073.38392.00.083650.111400.100700.02757...18.9379.73450.00.11020.280900.302100.082720.21570.104300
..................................................................
430907914M14.9022.53102.10685.00.099470.222500.273300.09711...27.57125.40832.70.14190.709000.901900.247500.28660.115501
3719012568B15.1913.2197.65711.80.079630.069340.033930.02657...15.73104.50819.10.11260.173700.136200.081780.24870.067660
4659113239B13.2420.1386.87542.90.082840.122300.101000.02833...25.50115.00733.50.12010.564600.655600.135700.28450.124900
60858970B10.1714.8864.55311.90.113400.080610.010840.01290...17.4569.86368.60.12750.098660.021680.025790.35570.080200
426907409B10.4814.9867.49333.60.098160.101300.063350.02218...21.5781.41440.40.13270.299600.293900.093100.30200.096460
\n", + "

398 rows × 33 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 \\\n", + "361 901041 B 13.30 21.57 85.24 546.1 0.08582 0.06373 0.03344 \n", + "186 874217 M 18.31 18.58 118.60 1041.0 0.08588 0.08468 0.08169 \n", + "199 877500 M 14.45 20.22 94.49 642.7 0.09872 0.12060 0.11800 \n", + "389 90312 M 19.55 23.21 128.90 1174.0 0.10100 0.13180 0.18560 \n", + "388 903011 B 11.27 15.50 73.38 392.0 0.08365 0.11140 0.10070 \n", + ".. ... .. ... ... ... ... ... ... ... \n", + "430 907914 M 14.90 22.53 102.10 685.0 0.09947 0.22250 0.27330 \n", + "371 9012568 B 15.19 13.21 97.65 711.8 0.07963 0.06934 0.03393 \n", + "465 9113239 B 13.24 20.13 86.87 542.9 0.08284 0.12230 0.10100 \n", + "60 858970 B 10.17 14.88 64.55 311.9 0.11340 0.08061 0.01084 \n", + "426 907409 B 10.48 14.98 67.49 333.6 0.09816 0.10130 0.06335 \n", + "\n", + " 9 ... 23 24 25 26 27 28 29 \\\n", + "361 0.02424 ... 29.20 92.94 621.2 0.1140 0.16670 0.12120 0.05614 \n", + "186 0.05814 ... 26.36 139.20 1410.0 0.1234 0.24450 0.35380 0.15710 \n", + "199 0.05980 ... 30.12 117.90 1044.0 0.1552 0.40560 0.49670 0.18380 \n", + "389 0.10210 ... 30.44 142.00 1313.0 0.1251 0.24140 0.38290 0.18250 \n", + "388 0.02757 ... 18.93 79.73 450.0 0.1102 0.28090 0.30210 0.08272 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "430 0.09711 ... 27.57 125.40 832.7 0.1419 0.70900 0.90190 0.24750 \n", + "371 0.02657 ... 15.73 104.50 819.1 0.1126 0.17370 0.13620 0.08178 \n", + "465 0.02833 ... 25.50 115.00 733.5 0.1201 0.56460 0.65560 0.13570 \n", + "60 0.01290 ... 17.45 69.86 368.6 0.1275 0.09866 0.02168 0.02579 \n", + "426 0.02218 ... 21.57 81.41 440.4 0.1327 0.29960 0.29390 0.09310 \n", + "\n", + " 30 31 label \n", + "361 0.2637 0.06658 0 \n", + "186 0.3206 0.06938 1 \n", + "199 0.4753 0.10130 1 \n", + "389 0.2576 0.07602 1 \n", + "388 0.2157 0.10430 0 \n", + ".. ... ... ... \n", + "430 0.2866 0.11550 1 \n", + "371 0.2487 0.06766 0 \n", + "465 0.2845 0.12490 0 \n", + "60 0.3557 0.08020 0 \n", + "426 0.3020 0.09646 0 \n", + "\n", + "[398 rows x 33 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "751772e4-e90b-4c11-ae63-cdb9602529e7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---- Model Summary ----\n", + "Layer 1: relu\n", + "W: (16, 30) b: (16, 1)\n", + "Trainable parameters: 496\n", + "Layer 2: relu\n", + "W: (16, 16) b: (16, 1)\n", + "Trainable parameters: 272\n", + "Layer 3: sigmoid\n", + "W: (1, 16) b: (1, 1)\n", + "Trainable parameters: 17\n" + ] + } + ], + "source": [ + "NN = NeuralNetwork(input_size = train_data.shape[1])\n", + "NN.add_layer(16, \"relu\")\n", + "NN.add_layer(16, \"relu\")\n", + "NN.add_layer(1, \"sigmoid\")\n", + "NN.compile(loss = \"binary crossentropy\")\n", + "NN.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "0f010d0a-ceef-4824-b36b-9752547248f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training finished after epoch 1000 with a loss of 0.166389194481977.\n" + ] + } + ], + "source": [ + "hist = NN.fit(train_data, train_target, epochs = 1000, batch_size = 32, learning_rate = 0.01, verbose = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "f4a324e2-2070-43d5-8cb6-8b07cbb69078", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_history(hist);" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "8b581b00-8d8e-4ea6-80c1-8f11dfbad692", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training accuracy: 0.8869346733668342\n", + "Test accuracy: 0.8888888888888888\n" + ] + } + ], + "source": [ + "train_predictions = np.round(NN.predict(train_data))\n", + "print(\"Training accuracy: \", accuracy_score(train[\"label\"].to_numpy(), train_predictions))\n", + "test_predictions = np.round(NN.predict(test_data))\n", + "print(\"Test accuracy: \", accuracy_score(test[\"label\"].to_numpy(), test_predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b49a45e0-9906-4a34-912c-f3ec10ea2fa2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/neuralnet.py b/neuralnet.py new file mode 100644 index 0000000..c842c31 --- /dev/null +++ b/neuralnet.py @@ -0,0 +1,674 @@ +#!/usr/bin/env python3 + +# NEURAL NETWORK IMPLEMENTATION +# 2022 (c) Micha Johannes Birklbauer +# https://github.com/michabirklbauer/ +# micha.birklbauer@gmail.com + +import math +import numpy as np +from typing import Tuple +from typing import List + +class LayerInitializer: + """ + Functions for layer weight initialization. + """ + + # He normal initialization + @staticmethod + def he_normal(size: Tuple[int], fan_in: int) -> np.array: + """ + HE NORMAL INITIALIZATION + Draws samples from a truncated normal distribution centered at 0 mean + with stddev = sqrt(2 / fan_in) where fan_in is the number of input + units per unit in the layer. + Parameters: + - size: Tuple[int] (rows, columns) + shape of the initialized weight matrix + - fan_in: int + number of input units per unit in the layer + Returns: + - np.array (rows, columns) + He normal initialized weight matrix + Ref: + https://arxiv.org/abs/1502.01852 + """ + return np.random.normal(0, math.sqrt(2 / fan_in), size = size) + + # Glorot / Xavier normal initialization + @staticmethod + def glorot_normal(size: Tuple[int], fan_in: int, fan_out: int) -> np.array: + """ + GLOROT / XAVIER NORMAL INITIALIZATION + Draws samples from a truncated normal distribution centered at 0 mean + with stddev = sqrt(2 / (fan_in + fan_out)) where fan_in is the number of + input units per unit in the layer and fan_out is the number of output + units per unit in the layer. + Parameters: + - size: Tuple[int] (rows, columns) + shape of the initialized weight matrix + - fan_in: int + number of input units per unit in the layer + - fan_out: int + number of output units per unit in the layer + Returns: + - np.array (rows, columns) + Glorot normal initialized weight matrix + Ref: + http://proceedings.mlr.press/v9/glorot10a.html + """ + return np.random.normal(0, math.sqrt(2 / (fan_in + fan_out)), size = size) + + # Bias initialization + @staticmethod + def bias(size: Tuple[int]): + """ + BIAS INITIALIZATION + Initializes the bias vector / matrix with zeros. + Parameters: + - size: Tuple[int] (rows, columns) + shape of the initialized bias vector / matrix + Returns: + - np.array (rows, columns) + Zero initialized bias vector / matrix + Ref: + https://cs231n.github.io/neural-networks-2/ + """ + return np.zeros(shape = size) + +class ActivationFunctions: + """ + Layer activation functions. + """ + + # Rectified Linear Units + @staticmethod + def relu(x: np.array, derivative: bool = False) -> np.array: + """ + RECTIFIED LINEAR UNITS + ReLU activation function. + Parameters: + - x: np.array + input matrix to apply activation function to + - derivative: bool + if set to 'True' returns the derivative instead + DEFAULT: False + Returns: + - np.array (same shape as x) + activated x / derivative of x + Ref: + https://en.wikipedia.org/wiki/Rectifier_(neural_networks) + """ + if not derivative: + return np.maximum(x, 0) + else: + return np.where(x > 0, 1, 0) + + # Sigmoid activation function + @staticmethod + def sigmoid(x: np.array, derivative: bool = False) -> np.array: + """ + SIGMOID / LOGISTIC FUNCTION + Sigmoid activation function. + Parameters: + - x: np.array + input matrix to apply activation function to + - derivative: bool + if set to 'True' returns the derivative instead + DEFAULT: False + Returns: + - np.array (same shape as x) + activated x / derivative of x + Refs: + https://en.wikipedia.org/wiki/Sigmoid_function + https://en.wikipedia.org/wiki/Activation_function + """ + def f_sigmoid(x: np.array) -> np.array: + return 1 / (1 + np.exp(-x)) + + if not derivative: + return f_sigmoid(x) + else: + return f_sigmoid(x) * (1 - f_sigmoid(x)) + + # Softmax activation function + @staticmethod + def softmax(x: np.array, derivative: bool = False) -> np.array: + """ + SOFTMAX FUNCTION + Stable softmax activation function. + Parameters: + - x: np.array + input matrix to apply activation function to + Returns: + - np.array (same shape as x) + activated x + Refs: + https://en.wikipedia.org/wiki/Softmax_function + https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/ + """ + if not derivative: + n = np.exp(x - np.max(x)) # stable softmax + d = np.sum(n, axis = 0) + return n / d + else: + raise NotImplementedError("Softmax derivative not implemented!") + # https://stackoverflow.com/questions/54976533/derivative-of-softmax-function-in-python + # xr = x.reshape((-1, 1)) + # return np.diagflat(x) - np.dot(xr, xr.T) + +class LossFunctions: + """ + Loss functions for neural net fitting. + """ + + # binary cross entropy loss + @staticmethod + def binary_cross_entropy(y_true: np.array, y_predicted: np.array) -> np.array: + """ + BINARY CROSS ENTROPY LOSS + Cross entropy loss for binary-class classification. + L[BCE] = - p(i) * log(q(i)) - (1 - p(i)) * log(1 - q(i)) + where + - p(i) is the true label + - q(i) is the predicted sigmoid probability + Parameters: + - y_true: np.array (1, sample_size) + true label vector + - y_predicted: np.array (1, sample_size) + the sigmoid probability + Returns: + - np.array (sample_size,) + loss for every given sample + Ref: + https://en.wikipedia.org/wiki/Cross_entropy + """ + losses = [] + for i in range(y_true.shape[1]): + ## stable BCE + losses.append(float(-1 * (y_true[:, i] * np.log(y_predicted[:, i] + 1e-7) + (1 - y_true[:, i]) * np.log(1 - y_predicted[:, i] + 1e-7)))) + ## unstable BCE + # losses.append(float(-1 * (y_true[:, i] * np.log(y_predicted[:, i]) + (1 - y_true[:, i]) * np.log(1 - y_predicted[:, i])))) + return np.array(losses) + + # categorical cross entropy loss + @staticmethod + def categorical_cross_entropy(y_true: np.array, y_predicted: np.array) -> np.array: + """ + CATEGORICAL CROSS ENTROPY LOSS + Cross entropy loss for binary- and multi-class class classification. + L[CCE] = - sum[from i = 0 to n]( p(i) * log(q(i)) ) + where + - p(i) is the true label + - q(i) is the predicted softmax probability + - n is the number of classes + Parameters: + - y_true: np.array (n_classes, sample_size) + one-hot encoded true label vector + - y_predicted: np.array (n_classes, sample_size) + the softmax probabilities + Returns: + - np.array (sample_size,) + loss for every given sample + Ref: + https://en.wikipedia.org/wiki/Cross_entropy + """ + losses = [] + for i in range(y_true.shape[1]): + ## stable CCE + # losses.append(float(-1 * np.sum(y_true[:, i] * np.log(y_predicted[:, i] + 1e-7)))) + ## unstable CCE + losses.append(float(-1 * np.sum(y_true[:, i] * np.log(y_predicted[:, i])))) + + return np.array(losses) + +class NeuralNetwork: + """ + Implementation of a classic feed-forward neural network that is trained via + backpropagation. Adopts a Keras-like interface for convenient usage (see + https://michabirklbauer.github.io/neuralnet for examples). + """ + + # constructor + def __init__(self, input_size: int): + """ + CONSTRUCTOR + Initializes the neural network model. + Parameters: + - input_size: int + nr. of features in the training data + Returns: + - None + Example usage: + NN = NeuralNetwork(data.shape[1]) + """ + self.input_size = input_size + self.architecture = [] + self.layers = [] + + # adding layers + def add_layer(self, units: int, activation: str = "relu", initialization: str = None) -> None: + """ + LAYER MANAGEMENT + Construct the neural network architecture by adding different layers. + Parameters: + - units: int + nr. of units in the layer + - activation: str, one of ("relu", "sigmoid", "softmax") + activation function of the layer + DEFAULT: "relu" + - initialization: str, one of ("he", "glorot") + weight initialization to use + DEFAULT: None, "relu" layers are 'he normal' initialized, + all other layers are 'glorot normal' + initialized + Returns: + - None + Example usage: + NN = NeuralNetwork(data.shape[1]) + NN.add_layer(16, "relu", "glorot") + NN.add_layer(8) + NN.add_layer(1, "sigmoid") + """ + if initialization == None: + if activation == "relu": + layer_init = "he" + else: + layer_init = "glorot" + else: + layer_init = initialization + + self.architecture.append({"units": units, "activation": activation, "init": layer_init}) + + # compiling model + def compile(self, loss: str = "categorical crossentropy") -> None: + """ + MODEL INITIALIZATION + Initializes all parameters of the neural network architecture and + prepares the model for training. + Parameters: + - loss: str, one of ("binary crossentropy", "categorical crossentropy") + the loss function that should be used for training + DEFAULT: "categorical crossentropy" + Returns: + - None + Example usage: + NN = NeuralNetwork(data.shape[1]) + NN.add_layer(16, "relu", "glorot") + NN.add_layer(8) + NN.add_layer(1, "sigmoid") + NN.compile("binary crossentropy") + """ + self.loss = loss + + # initialize all layer weights and biases + for i in range(len(self.architecture)): + units = self.architecture[i]["units"] + activation = self.architecture[i]["activation"] + init = self.architecture[i]["init"] + + units_previous_layer = self.input_size + if i > 0: + units_previous_layer = self.architecture[i - 1]["units"] + units_next_layer = 0 + if i < len(self.architecture) - 1: + units_next_layer = self.architecture[i + 1]["units"] + + if init == "he": + W = LayerInitializer.he_normal((units, units_previous_layer), fan_in = units_previous_layer) + b = LayerInitializer.bias((units, 1)) + elif init == "glorot": + W = LayerInitializer.glorot_normal((units, units_previous_layer), fan_in = units_previous_layer, fan_out = units_next_layer) + b = LayerInitializer.bias((units, 1)) + else: + raise NotImplementedError("Layer initialization '" + init + "' not implemented!") + + self.layers.append({"W": W, "b": b, "activation": activation}) + + # forward propagation + def __forward_propagation(self, data: np.array) -> None: + """ + FORWARD PROPAGATION (INTERNAL) + Internal function calculating the forward pass of A(Wx + b). + - The result of 'Wx + b' (L) is stored in self.layers[layer]["L"] + - The result of 'Activation(L)' (A) is stored in self.layers[layer]["A"] + Parameters: + - data: np.array + input data for the forward pass + Returns: + - None, "L" and "A" are set in the layer dictionary, to retrieve the + last layer output call 'self.layers[-1]["A"]' + """ + + for i in range(len(self.layers)): + + if i == 0: + A = data + else: + A = self.layers[i - 1]["A"] + + # Wx + b where x is the input data for the first layer and otherwise + # the output (A) of the previous layer + self.layers[i]["L"] = self.layers[i]["W"].dot(A) + self.layers[i]["b"] + if self.layers[i]["activation"] == "relu": + self.layers[i]["A"] = ActivationFunctions.relu(self.layers[i]["L"]) + elif self.layers[i]["activation"] == "sigmoid": + self.layers[i]["A"] = ActivationFunctions.sigmoid(self.layers[i]["L"]) + elif self.layers[i]["activation"] == "softmax": + self.layers[i]["A"] = ActivationFunctions.softmax(self.layers[i]["L"]) + else: + raise NotImplementedError("Activation function '" + layer["activation"] + "' not implemented!") + + # back propagation + def __back_propagation(self, data: np.array, target: np.array, learning_rate: float = 0.1) -> float: + """ + BACK PROPAGATION (INTERNAL) + Internal function for learning layer weights and biases using gradient + descent and back propagation. + Parameters: + - data: np.array + input data + - target: np.array + class labels of the input data + - learning_rate: float + learning rate / how far in the direction of the gradient to + go + DEFAULT: 0.1 + Returns: + - float + loss of the current forward pass + """ + # forward pass + self.__forward_propagation(data) + + output = self.layers[-1]["A"] + batch_size = data.shape[1] + loss = 0 + + # calculate loss of the current forward pass + if self.loss == "categorical crossentropy": + losses = LossFunctions.categorical_cross_entropy(y_true = target, y_predicted = output) + # reduction by sum over batch size + loss = float(np.sum(losses) / batch_size) + elif self.loss == "binary crossentropy": + losses = LossFunctions.binary_cross_entropy(y_true = target, y_predicted = output) + # reduction by sum over batch size + loss = float(np.sum(losses) / batch_size) + else: + raise NotImplementedError("Loss function '" + self.loss + "' not implemented!") + + # calculate and back pass the derivate of the loss w.r.t the output + # activation function + # this implementation suppports CCE + Softmax and BCE + Sigmoid in the + # output layer + if self.loss == "categorical crossentropy" and self.layers[-1]["activation"] == "softmax": + # for categorical cross entropy loss the derivative of softmax simplifies to + # P(i) - Y(i) + # where P(i) is the softmax output and Y(i) is the true label + # https://www.ics.uci.edu/~pjsadows/notes.pdf + # https://math.stackexchange.com/questions/945871/derivative-of-softmax-loss-function + previous_layer_activation = data.T if len(self.layers) == 1 else self.layers[len(self.layers) - 2]["A"].T + dL = self.layers[-1]["A"] - target + dW = dL.dot(previous_layer_activation) / batch_size + db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size + + # parameter tracking + previous_dL = np.copy(dL) + previous_W = np.copy(self.layers[-1]["W"]) + + # update + self.layers[-1]["W"] -= learning_rate * dW + self.layers[-1]["b"] -= learning_rate * db + elif self.loss == "binary crossentropy" and self.layers[-1]["activation"] == "sigmoid": + # for binary cross entropy loss the derivative of the loss function is + # L' = -1 * (Y(i) / P(i) - (1 - Y(i)) / (1 - P(i))) + # where P(i) is the sigmoid output and Y(i) is the true label + # and we multiply that with the derivative of the sigmoid function [1] + # https://math.stackexchange.com/questions/2503428/derivative-of-binary-cross-entropy-why-are-my-signs-not-right + previous_layer_activation = data.T if len(self.layers) == 1 else self.layers[len(self.layers) - 2]["A"].T + # [1] + # A = np.clip(self.layers[-1]["A"], 1e-7, 1 - 1e-7) + # derivative_loss = -1 * np.divide(target, A) + np.divide(1 - target, 1 - A) + # dL = derivative_loss * ActivationFunctions.sigmoid(self.layers[-1]["L"], derivative = True) + # alternatively we can directly simplify the derivative of the binary cross entropy loss + # with sigmoid activation function to + # P(i) - Y(i) + # where P(i) is the sigmoid output and Y(i) is the true label + # done in [2] + # https://math.stackexchange.com/questions/4227931/what-is-the-derivative-of-binary-cross-entropy-loss-w-r-t-to-input-of-sigmoid-fu + # [2] + dL = (self.layers[-1]["A"] - target) / batch_size + dW = dL.dot(previous_layer_activation) / batch_size + db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size + + # parameter tracking + previous_dL = np.copy(dL) + previous_W = np.copy(self.layers[-1]["W"]) + + # update + self.layers[-1]["W"] -= learning_rate * dW + self.layers[-1]["b"] -= learning_rate * db + else: + raise NotImplementedError("The combination of '" + self.loss + " loss' and '" + self.layers[i]["activation"] + " activation' is not implemented!") + + # back propagation through the remaining hidden layers + for i in reversed(range(len(self.layers) - 1)): + + if i == 0: + if self.layers[i]["activation"] == "relu": + dL = previous_W.T.dot(previous_dL) * ActivationFunctions.relu(self.layers[i]["L"], derivative = True) + dW = dL.dot(data.T) / batch_size + db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size + elif self.layers[i]["activation"] == "sigmoid": + dL = previous_W.T.dot(previous_dL) * ActivationFunctions.sigmoid(self.layers[i]["L"], derivative = True) + dW = dL.dot(data.T) / batch_size + db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size + else: + raise NotImplementedError("Activation function '" + self.layers[i]["activation"] + "' not implemented for hidden layers!") + + # parameter tracking + previous_dL = np.copy(dL) + previous_W = np.copy(self.layers[i]["W"]) + + #update + self.layers[i]["W"] -= learning_rate * dW + self.layers[i]["b"] -= learning_rate * db + else: + if self.layers[i]["activation"] == "relu": + dL = previous_W.T.dot(previous_dL) * ActivationFunctions.relu(self.layers[i]["L"], derivative = True) + dW = dL.dot(self.layers[i - 1]["A"].T) / batch_size + db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size + elif self.layers[i]["activation"] == "sigmoid": + dL = previous_W.T.dot(previous_dL) * ActivationFunctions.sigmoid(self.layers[i]["L"], derivative = True) + dW = dL.dot(self.layers[i - 1]["A"].T) / batch_size + db = np.reshape(np.sum(dL, axis = 1), (-1, 1)) / batch_size + else: + raise NotImplementedError("Activation function '" + self.layers[i]["activation"] + "' not implemented for hidden layers!") + + # parameter tracking + previous_dL = np.copy(dL) + previous_W = np.copy(self.layers[i]["W"]) + + #update + self.layers[i]["W"] -= learning_rate * dW + self.layers[i]["b"] -= learning_rate * db + + return loss + + # neural network architecture summary + def summary(self) -> None: + """ + MODEL SUMMARY + Print a summary of the neural network architecture. + Parameters: + - None + Returns: + - None, prints a summary of the neural network architecture to + stdout + Example usage: + NN.summary() + """ + print("---- Model Summary ----") + for i, layer in enumerate(self.layers): + print("Layer " + str(i + 1) + ": " + layer["activation"]) + if "L" in layer: + print("W: " + str(layer["W"].shape) + " " + + "b: " + str(layer["b"].shape) + " " + + "L: " + str(layer["L"].shape) + " " + + "A: " + str(layer["A"].shape)) + else: + print("W: " + str(layer["W"].shape) + " " + + "b: " + str(layer["b"].shape)) + print("Trainable parameters: " + str( + layer["W"].shape[0] * layer["W"].shape[1] + + layer["b"].shape[0] * layer["b"].shape[1])) + + # train neural network on data + def fit(self, X: np.array, y: np.array, epochs: int = 100, batch_size: int = 32, learning_rate: float = 0.1, verbose: int = 1) -> List[float]: + """ + TRAIN MODEL + Train the neural network. + Parameters: + - X: np.array (samples, features) + input data to train on + - y: np.array (samples, labels) or (labels,) + labels of the input data + - epochs: int + how many iterations to train + DEFAULT: 100 + - batch_size: int + how many samples to use per backward pass + DEFAULT: 32 + - learning_rate: float + learning rate / how far in the direction of the gradient to + go + DEFAULT: 0.1 + - verbose: int, one of (0, 1) / bool + print information for every epoch + DEFAULT: 1 (True) + Returns: + - List[float] + loss history over all epochs + Example usage: + NN.fit(data_train, labels_train) + """ + # reshaping inputs + if y.ndim == 1: + y = np.reshape(y, (-1, 1)) + + data = X.T + target = y.T + sample_size = data.shape[1] + + history = [] + + # train network + for i in range(epochs): + if verbose: + print("Training epoch " + str(i + 1) + "...") + # generate random batches of size batch_size + idx = np.random.choice(sample_size, sample_size, replace = False) + batches = np.array_split(idx, math.ceil(sample_size / batch_size)) + batch_losses = [] + for batch in batches: + current_data = data[:, batch] + current_target = target[:, batch] + batch_loss = self.__back_propagation(current_data, current_target, learning_rate = learning_rate) + batch_losses.append(batch_loss) + history.append(np.mean(batch_losses)) + if verbose: + print("Current loss: ", np.mean(batch_losses)) + print("Epoch " + str(i + 1) + " done!") + + print("Training finished after epoch " + str(epochs) + " with a loss of " + str(history[-1]) + ".") + + return history + + # predict data with fitted neural network + def predict(self, X: np.array) -> np.array: + """ + GENERATE PREDICTIONS + Predict labels for the given input data. + Parameters: + - X: np.array (samples, features) or (features,) + input data to predict + Returns: + - np.array + predictions + Example usage: + NN.predict(data_test) + """ + if X.ndim == 1: + X = np.reshape(X, (1, -1)) + + self.__forward_propagation(X.T) + + return self.layers[-1]["A"].T + +if __name__ == "__main__": + pass + + """ + #### Multi-class Classification #### + + import pandas as pd + from sklearn.metrics import accuracy_score + from sklearn.preprocessing import OneHotEncoder + from sklearn.model_selection import train_test_split + + data = pd.read_csv("multiclass_train.csv") + train, test = train_test_split(data, test_size = 0.3) + train_data = train.loc[:, train.columns != "label"].to_numpy() / 255 + train_target = train["label"].to_numpy() + test_data = test.loc[:, test.columns != "label"].to_numpy() / 255 + test_target = test["label"].to_numpy() + + one_hot = OneHotEncoder(sparse = False, categories = "auto") + train_target = one_hot.fit_transform(train_target.reshape(-1, 1)) + test_target = one_hot.transform(test_target.reshape(-1, 1)) + + NN = NeuralNetwork(input_size = train_data.shape[1]) + NN.add_layer(32, "relu") + NN.add_layer(16, "relu") + NN.add_layer(10, "softmax") + NN.compile(loss = "categorical crossentropy") + NN.summary() + + hist = NN.fit(train_data, train_target, epochs = 30, batch_size = 16, learning_rate = 0.05) + + train_predictions = np.argmax(NN.predict(train_data), axis = 1) + print("Training accuracy: ", accuracy_score(train["label"].to_numpy(), train_predictions)) + test_predictions = np.argmax(NN.predict(test_data), axis = 1) + print("Test accuracy: ", accuracy_score(test["label"].to_numpy(), test_predictions)) + + #### Binary-class Classification #### + + import pandas as pd + from sklearn.metrics import accuracy_score + from sklearn.preprocessing import OneHotEncoder + from sklearn.model_selection import train_test_split + + data = pd.read_csv("binaryclass_train.csv", header = None) + data["label"] = data[1].apply(lambda x: 1 if x == "M" else 0) + train, test = train_test_split(data, test_size = 0.3) + train_data = train.loc[:, ~train.columns.isin([0, 1, "label"])].to_numpy() + train_target = train["label"].to_numpy() + test_data = test.loc[:, ~test.columns.isin([0, 1, "label"])].to_numpy() + test_target = test["label"].to_numpy() + + NN = NeuralNetwork(input_size = train_data.shape[1]) + NN.add_layer(16, "relu") + NN.add_layer(16, "relu") + NN.add_layer(1, "sigmoid") + NN.compile(loss = "binary crossentropy") + NN.summary() + + hist = NN.fit(train_data, train_target, epochs = 1000, batch_size = 32, learning_rate = 0.01) + + train_predictions = np.round(NN.predict(train_data)) + print("Training accuracy: ", accuracy_score(train["label"].to_numpy(), train_predictions)) + test_predictions = np.round(NN.predict(test_data)) + print("Test accuracy: ", accuracy_score(test["label"].to_numpy(), test_predictions)) + + """ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bb082cf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +numpy +pandas +matplotlib +scikit-learn \ No newline at end of file