-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloss_functions_and_metrics_guide.py
174 lines (144 loc) · 6.75 KB
/
loss_functions_and_metrics_guide.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import numpy as np
from sklearn.metrics import (
mean_squared_error, mean_absolute_error, explained_variance_score, r2_score,
accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
)
import tensorflow as tf
# ---- Introduction ----
# This script explains various loss functions and evaluation metrics, their use cases,
# and why and when to use them. Includes both basic and advanced methods for regression
# and classification problems.
# ========================================================
# 1. LOSS FUNCTIONS FOR REGRESSION
# ========================================================
print("\n--- Regression Loss Functions ---\n")
# 1. Regression: Common Loss Functions
# - Mean Squared Error (MSE)
# - Mean Absolute Error (MAE)
# Simulated Regression Data
y_true_reg = np.array([3.0, -0.5, 2.0, 7.0])
y_pred_reg = np.array([2.5, 0.0, 2.0, 8.0])
# 1.1 Mean Squared Error (MSE)
# Purpose: Penalizes large errors more than smaller ones. Sensitive to outliers.
# Penalizes large errors more heavily. Use when outliers are important to consider.
mse = mean_squared_error(y_true_reg, y_pred_reg)
print(f"MSE (Mean Squared Error): {mse:.2f}")
# 1.2 Mean Absolute Error (MAE)
# Purpose: Penalizes all errors equally. Less sensitive to outliers.
# Penalizes errors linearly. Less sensitive to outliers.
mae = mean_absolute_error(y_true_reg, y_pred_reg)
print(f"MAE (Mean Absolute Error): {mae:.2f}")
# 1.3 Huber Loss
# Combines MSE and MAE to handle outliers better. Use when data has noise or outliers.
delta = 1.0
huber_loss = np.mean([
0.5 * (y - y_pred)**2 if abs(y - y_pred) <= delta else delta * abs(y - y_pred) - 0.5 * delta**2
for y, y_pred in zip(y_true_reg, y_pred_reg)
])
print(f"Huber Loss: {huber_loss:.2f}")
# 1.4 Log-Cosh Loss
# Similar to MAE but differentiable everywhere. Use when small outliers are acceptable.
log_cosh_loss = np.mean(np.log(np.cosh(y_pred_reg - y_true_reg)))
print(f"Log-Cosh Loss: {log_cosh_loss:.2f}")
# 1.5 R^2 Score
# Measures goodness of fit. Values closer to 1 indicate better fit.
r2 = r2_score(y_true_reg, y_pred_reg)
print(f"R² Score: {r2:.2f}")
# ========================================================
# 2. LOSS FUNCTIONS FOR CLASSIFICATION
# ========================================================
print("\n--- Classification Loss Functions ---\n")
# 2. Classification: Common Loss Functions
# - Binary Cross-Entropy (BCE)
# - Categorical Cross-Entropy (CCE)
# Simulated Classification Data
y_true_bin = np.array([1, 0, 1, 1]) # Binary labels
y_pred_bin = np.array([0.9, 0.2, 0.8, 0.7]) # Predicted probabilities
y_true_cat = np.array([2, 0, 1]) # Labels for 3 classes
y_pred_cat = np.array([
[0.1, 0.7, 0.2],
[0.8, 0.1, 0.1],
[0.2, 0.5, 0.3]
]) # Predicted probabilities for 3 classes
# 2.1 Binary Cross-Entropy (BCE)
# Suitable for binary classification tasks (e.g., spam detection).
bce = tf.keras.losses.BinaryCrossentropy()(y_true_bin, y_pred_bin).numpy()
print(f"Binary Cross-Entropy (BCE): {bce:.2f}")
# 2.2 Categorical Cross-Entropy (CCE)
# Used for multi-class classification tasks.
cce = tf.keras.losses.CategoricalCrossentropy()(tf.one_hot(y_true_cat, depth=3), y_pred_cat).numpy()
print(f"Categorical Cross-Entropy (CCE): {cce:.2f}")
# 2.3 Sparse Categorical Cross-Entropy (SCCE)
# Similar to CCE but works with integer labels instead of one-hot encoding.
scce = tf.keras.losses.SparseCategoricalCrossentropy()(y_true_cat, y_pred_cat).numpy()
print(f"Sparse Categorical Cross-Entropy (SCCE): {scce:.2f}")
# ========================================================
# 3. EVALUATION METRICS FOR CLASSIFICATION
# ========================================================
print("\n--- Classification Evaluation Metrics ---\n")
# 3. Evaluation Metrics: Classification
# - Accuracy
# - F1 Score
# - Confusion Matrix
# Convert predicted probabilities to class labels
y_pred_classes = np.argmax(y_pred_cat, axis=1)
# 3.1 Accuracy
# Simple metric for balanced datasets.
# Purpose: Measures the proportion of correctly classified instances.
accuracy = accuracy_score(y_true_cat, y_pred_classes)
print(f"Accuracy: {accuracy:.2f}")
# 3.2 Precision
# Measures true positives out of predicted positives. Use when false positives are costly.
precision = precision_score(y_true_cat, y_pred_classes, average='weighted', zero_division=1)
print(f"Precision: {precision:.2f}")
# 3.3 Recall
# Measures true positives out of actual positives. Use when false negatives are costly.
recall = recall_score(y_true_cat, y_pred_classes, average='weighted', zero_division=1)
print(f"Recall: {recall:.2f}")
# 3.4 F1 Score
# Harmonic mean of precision and recall. Useful for imbalanced datasets.
# Purpose: Balances precision and recall. Used for imbalanced datasets.
f1 = f1_score(y_true_cat, y_pred_classes, average='weighted')
print(f"F1 Score: {f1:.2f}")
# 3.5 ROC-AUC Score
# Evaluates binary classifiers over various threshold settings.
roc_auc = roc_auc_score(y_true_bin, y_pred_bin)
print(f"ROC-AUC Score: {roc_auc:.2f}")
# 3.6 Confusion Matrix
# Provides a detailed breakdown of true/false positives and negatives.
# Purpose: Shows the counts of true positives, false positives, true negatives, and false negatives.
conf_matrix = confusion_matrix(y_true_cat, y_pred_classes)
print(f"Confusion Matrix:\n{conf_matrix}")
# Classification:
# - Precision: Measures how many of the predicted positives are actual positives.
# - Recall: Measures how many of the actual positives were correctly predicted.
# Example:
precision = np.diag(conf_matrix) / np.sum(conf_matrix, axis=0)
recall = np.diag(conf_matrix) / np.sum(conf_matrix, axis=1)
print(f"Precision: {precision}")
print(f"Recall: {recall}")
# ========================================================
# 4. WHEN TO USE WHICH LOSS FUNCTION OR METRIC?
# ========================================================
print("\n--- Summary of Use Cases ---\n")
print("""
1. Regression:
- Use MSE when large errors need heavy penalization.
- Use MAE when outliers should have less impact.
- Use Huber or Log-Cosh Loss for noisy datasets with some outliers.
- Huber Loss: Combines MSE and MAE for robustness to outliers.
- Log-Cosh Loss: Similar to MAE but differentiable everywhere.
2. Binary Classification:
- Use Binary Cross-Entropy for binary classification.
- Use ROC-AUC Score to measure performance across thresholds.
3. Multi-Class Classification:
- Use Categorical Cross-Entropy for one-hot encoded labels.
- Use Sparse Categorical Cross-Entropy for integer labels.
- Use F1 Score for imbalanced datasets.
4. Evaluation Metrics:
- Accuracy: Use for balanced datasets.
- Precision: Use when false positives are costly.
- Recall: Use when false negatives are costly.
- F1 Score: Use for imbalanced datasets.
- ROC-AUC: Use for evaluating binary classifiers.
""")