Skip to content

Commit

Permalink
(#8) udpate preprocessing [deploy]
Browse files Browse the repository at this point in the history
  • Loading branch information
SaintAngeLs committed Jan 14, 2025
1 parent f825a13 commit 278a0fb
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 16 deletions.
1 change: 0 additions & 1 deletion src/FeatureFlex/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def transform(self, X):
X.drop(columns=[col], inplace=True)
return X


class DataPreprocessor:
"""
Handles preprocessing of data including missing values, scaling, encoding, and feature extraction.
Expand Down
69 changes: 54 additions & 15 deletions tutorial/tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -111,16 +111,45 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 73,
"metadata": {
"vscode": {
"languageId": "markdown"
}
},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (3268278751.py, line 1)",
"output_type": "error",
"traceback": [
"\u001b[0;36m Cell \u001b[0;32mIn[73], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m Pakiet instalujemy z https://pypi.org/project/FeatureFlex/#FeatureFlex-0.1.16-py3-none-any.whl\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"Pakiet instalujemy z https://pypi.org/project/FeatureFlex/#FeatureFlex-0.1.16-py3-none-any.whl"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"5585.74s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found existing installation: FeatureFlex 0.1.35\n",
"Uninstalling FeatureFlex-0.1.35:\n",
" Successfully uninstalled FeatureFlex-0.1.35\n",
"Found existing installation: FeatureFlex 0.1.44\n",
"Uninstalling FeatureFlex-0.1.44:\n",
" Successfully uninstalled FeatureFlex-0.1.44\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
Expand All @@ -131,15 +160,22 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 75,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"5593.71s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting FeatureFlex\n",
" Using cached FeatureFlex-0.1.35-py3-none-any.whl.metadata (8.4 kB)\n",
" Using cached FeatureFlex-0.1.44-py3-none-any.whl.metadata (8.4 kB)\n",
"Requirement already satisfied: alembic==1.14.0 in /home/kaliuser/Documents/portfolio/commercial_apps/CS-MINI-2024Z-AutoML_project_2/venv/lib/python3.12/site-packages (from FeatureFlex) (1.14.0)\n",
"Requirement already satisfied: autofeat==2.1.3 in /home/kaliuser/Documents/portfolio/commercial_apps/CS-MINI-2024Z-AutoML_project_2/venv/lib/python3.12/site-packages (from FeatureFlex) (2.1.3)\n",
"Requirement already satisfied: Boruta==0.4.3 in /home/kaliuser/Documents/portfolio/commercial_apps/CS-MINI-2024Z-AutoML_project_2/venv/lib/python3.12/site-packages (from FeatureFlex) (0.4.3)\n",
Expand Down Expand Up @@ -212,9 +248,9 @@
"Requirement already satisfied: tzdata==2024.2 in /home/kaliuser/Documents/portfolio/commercial_apps/CS-MINI-2024Z-AutoML_project_2/venv/lib/python3.12/site-packages (from FeatureFlex) (2024.2)\n",
"Requirement already satisfied: urllib3==2.3.0 in /home/kaliuser/Documents/portfolio/commercial_apps/CS-MINI-2024Z-AutoML_project_2/venv/lib/python3.12/site-packages (from FeatureFlex) (2.3.0)\n",
"Requirement already satisfied: xgboost==2.1.3 in /home/kaliuser/Documents/portfolio/commercial_apps/CS-MINI-2024Z-AutoML_project_2/venv/lib/python3.12/site-packages (from FeatureFlex) (2.1.3)\n",
"Using cached FeatureFlex-0.1.35-py3-none-any.whl (18 kB)\n",
"Using cached FeatureFlex-0.1.44-py3-none-any.whl (18 kB)\n",
"Installing collected packages: FeatureFlex\n",
"Successfully installed FeatureFlex-0.1.35\n",
"Successfully installed FeatureFlex-0.1.44\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
Expand All @@ -225,7 +261,7 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 76,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -254,14 +290,16 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 77,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wczytano zbiór: world-happiness-report-2021.csv\n"
"Wczytano zbiór: world-happiness-report-2021.csv\n",
"Rozmiar zbioru treningowego: 119\n",
"Rozmiar zbioru testowego: 30\n"
]
}
],
Expand Down Expand Up @@ -314,7 +352,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 78,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -354,7 +392,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[54], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m help(DataPreprocessor)\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Zaawansowane przetwarzanie danych\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m preprocessor_advanced \u001b[38;5;241m=\u001b[39m \u001b[43mDataPreprocessor\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mscale_method\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrobust\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Skalowanie za pomocą RobustScaler\u001b[39;49;00m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle_outliers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Obsługa wartości odstających\u001b[39;49;00m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43minclude_interactions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Dodawanie interakcji między cechami\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 10\u001b[0m X_advanced, y_advanced, _ \u001b[38;5;241m=\u001b[39m preprocessor_advanced\u001b[38;5;241m.\u001b[39mpreprocess(\n\u001b[1;32m 11\u001b[0m data, \u001b[38;5;66;03m# Dane wejściowe\u001b[39;00m\n\u001b[1;32m 12\u001b[0m target_column \u001b[38;5;66;03m# Kolumna docelowa\u001b[39;00m\n\u001b[1;32m 13\u001b[0m )\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# Podział na zbiór treningowy i testowy\u001b[39;00m\n",
"Cell \u001b[0;32mIn[78], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m help(DataPreprocessor)\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Zaawansowane przetwarzanie danych\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m preprocessor_advanced \u001b[38;5;241m=\u001b[39m \u001b[43mDataPreprocessor\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mscale_method\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrobust\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Skalowanie za pomocą RobustScaler\u001b[39;49;00m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle_outliers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Obsługa wartości odstających\u001b[39;49;00m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43minclude_interactions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Dodawanie interakcji między cechami\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mdate_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\n\u001b[1;32m 9\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m X_advanced, y_advanced, _ \u001b[38;5;241m=\u001b[39m preprocessor_advanced\u001b[38;5;241m.\u001b[39mpreprocess(\n\u001b[1;32m 12\u001b[0m data, \u001b[38;5;66;03m# Dane wejściowe\u001b[39;00m\n\u001b[1;32m 13\u001b[0m target_column \u001b[38;5;66;03m# Kolumna docelowa\u001b[39;00m\n\u001b[1;32m 14\u001b[0m )\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# Podział na zbiór treningowy i testowy\u001b[39;00m\n",
"\u001b[0;31mTypeError\u001b[0m: DataPreprocessor() takes no arguments"
]
}
Expand All @@ -366,7 +404,8 @@
"preprocessor_advanced = DataPreprocessor(\n",
" scale_method=\"robust\", # Skalowanie za pomocą RobustScaler\n",
" handle_outliers=True, # Obsługa wartości odstających\n",
" include_interactions=True # Dodawanie interakcji między cechami\n",
" include_interactions=True, # Dodawanie interakcji między cechami\n",
" date_columns=None\n",
")\n",
"\n",
"X_advanced, y_advanced, _ = preprocessor_advanced.preprocess(\n",
Expand Down

0 comments on commit 278a0fb

Please sign in to comment.