Compare commits
3 Commits
7af1d66a8f
...
2c287d819d
| Author | SHA1 | Date | |
|---|---|---|---|
| 2c287d819d | |||
| 12163c14a0 | |||
| 7acc63bbe9 |
1
output/Team44-AC.ipynb
Normal file
1
output/Team44-D.ipynb
Normal file
BIN
output/Team44.pdf
Normal file
BIN
output/Team44.zip
Normal file
BIN
output/labelsX.npy
Normal file
BIN
presentation/Team44.pdf
Normal file
|
After Width: | Height: | Size: 6.0 MiB |
BIN
presentation/partA/EstGaussianDensity.png
Normal file
|
After Width: | Height: | Size: 55 KiB |
BIN
presentation/partA/MLE_Estimated_2d_gaussians.png
Normal file
|
After Width: | Height: | Size: 217 KiB |
BIN
presentation/partB/Dataset2_hist_vs_true.png
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
presentation/partB/Gaussian_kernel_h_vs_MSE.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
presentation/partB/Uniform_kernel_h_vs_MSE.png
Normal file
|
After Width: | Height: | Size: 21 KiB |
BIN
presentation/partC/knn_accuracy_over_k.png
Normal file
|
After Width: | Height: | Size: 46 KiB |
BIN
presentation/partC/knn_decision_boundaries.png
Normal file
|
After Width: | Height: | Size: 45 KiB |
BIN
presentation/partD/RawData.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
presentation/partD/feature_dist_overlap_2_vs_5.png
Normal file
|
After Width: | Height: | Size: 73 KiB |
BIN
src/figures/Confusion matrix (tuned) – scale + mlp.png
Normal file
|
After Width: | Height: | Size: 108 KiB |
BIN
src/figures/Confusion matrix (tuned) – scale + rf.png
Normal file
|
After Width: | Height: | Size: 105 KiB |
BIN
src/figures/Confusion matrix (tuned) – scale + svm.png
Normal file
|
After Width: | Height: | Size: 105 KiB |
BIN
src/figures/Confusion matrix (tuned) – scale_pca_85 + knn.png
Normal file
|
After Width: | Height: | Size: 110 KiB |
2
src/figures/tuning_results.csv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
config,preprocess,model,params,mean_acc,std_acc
|
||||||
|
scale + svm,scale,svm,"{'kernel': 'rbf', 'C': 4, 'gamma': 'scale', 'class_weight': None}",0.8581719138625145,0.013348223889216927
|
||||||
|
BIN
src/labelsX.npy
Normal file
BIN
src/labelsX_scale_mlp.npy
Normal file
BIN
src/labelsX_scale_pca_85_knn.npy
Normal file
BIN
src/labelsX_scale_rf.npy
Normal file
BIN
src/labelsX_scale_svm.npy
Normal file
185
src/output.txt
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
/home/hoo2/Work/AUTh/PatternRecognition/Assignment_2025-26/.venv/bin/python /home/hoo2/Work/AUTh/PatternRecognition/Assignment_2025-26/src/partD.py all
|
||||||
|
[ scale] [ gnb] val_acc=0.7095
|
||||||
|
[ scale] [ rf] val_acc=0.8205
|
||||||
|
[ scale] [ logreg] val_acc=0.7730
|
||||||
|
[ scale] [linear_svm] val_acc=0.7707
|
||||||
|
[ scale] [ svm] val_acc=0.8593
|
||||||
|
[ scale] [ mlp] val_acc=0.8382
|
||||||
|
[ scale] [ knn] val_acc=0.8342
|
||||||
|
[ scale] [ adaboost] val_acc=0.6832
|
||||||
|
[scale_pca_66] [ gnb] val_acc=0.7524
|
||||||
|
[scale_pca_66] [ rf] val_acc=0.8096
|
||||||
|
[scale_pca_66] [ logreg] val_acc=0.7862
|
||||||
|
[scale_pca_66] [linear_svm] val_acc=0.7736
|
||||||
|
[scale_pca_66] [ svm] val_acc=0.8582
|
||||||
|
[scale_pca_66] [ mlp] val_acc=0.8359
|
||||||
|
[scale_pca_66] [ knn] val_acc=0.8370
|
||||||
|
[scale_pca_66] [ adaboost] val_acc=0.6878
|
||||||
|
[scale_pca_75] [ gnb] val_acc=0.7547
|
||||||
|
[scale_pca_75] [ rf] val_acc=0.8130
|
||||||
|
[scale_pca_75] [ logreg] val_acc=0.7839
|
||||||
|
[scale_pca_75] [linear_svm] val_acc=0.7696
|
||||||
|
[scale_pca_75] [ svm] val_acc=0.8565
|
||||||
|
[scale_pca_75] [ mlp] val_acc=0.8216
|
||||||
|
[scale_pca_75] [ knn] val_acc=0.8370
|
||||||
|
[scale_pca_75] [ adaboost] val_acc=0.6878
|
||||||
|
[scale_pca_85] [ gnb] val_acc=0.7501
|
||||||
|
[scale_pca_85] [ rf] val_acc=0.8033
|
||||||
|
[scale_pca_85] [ logreg] val_acc=0.7810
|
||||||
|
[scale_pca_85] [linear_svm] val_acc=0.7662
|
||||||
|
[scale_pca_85] [ svm] val_acc=0.8588
|
||||||
|
[scale_pca_85] [ mlp] val_acc=0.8188
|
||||||
|
[scale_pca_85] [ knn] val_acc=0.8388
|
||||||
|
[scale_pca_85] [ adaboost] val_acc=0.6998
|
||||||
|
|
||||||
|
=== Investigation summary ===
|
||||||
|
model
|
||||||
|
svm 0.859348
|
||||||
|
knn 0.838765
|
||||||
|
mlp 0.838193
|
||||||
|
rf 0.820469
|
||||||
|
logreg 0.786164
|
||||||
|
linear_svm 0.773585
|
||||||
|
gnb 0.754717
|
||||||
|
adaboost 0.699828
|
||||||
|
|
||||||
|
Selected top-3 models for further analysis: ['svm', 'knn', 'mlp']
|
||||||
|
|
||||||
|
Best configuration overall: preprocess=scale, model=svm, val_acc=0.8593
|
||||||
|
|
||||||
|
Classification report (best config):
|
||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
1 0.94 0.96 0.95 354
|
||||||
|
2 0.76 0.73 0.75 344
|
||||||
|
3 0.92 0.93 0.93 351
|
||||||
|
4 0.91 0.91 0.91 343
|
||||||
|
5 0.75 0.77 0.76 357
|
||||||
|
|
||||||
|
accuracy 0.86 1749
|
||||||
|
macro avg 0.86 0.86 0.86 1749
|
||||||
|
weighted avg 0.86 0.86 0.86 1749
|
||||||
|
|
||||||
|
|
||||||
|
[TUNING] scale + rf (cv=5) ...
|
||||||
|
[scale | rf] combo 1/1 mean=0.8228 params={'n_estimators': 400, 'max_depth': None, 'max_features': 'sqrt', 'min_samples_split': 4, 'min_samples_leaf': 1}
|
||||||
|
best mean_acc=0.8228 (std=0.0121) params={'n_estimators': 400, 'max_depth': None, 'max_features': 'sqrt', 'min_samples_split': 4, 'min_samples_leaf': 1}
|
||||||
|
|
||||||
|
[TUNING] scale + mlp (cv=5) ...
|
||||||
|
[scale | mlp] combo 1/1 mean=0.8407 params={'hidden_layer_sizes': (128,), 'alpha': 0.001, 'learning_rate_init': 0.01, 'activation': 'relu', 'solver': 'adam'}
|
||||||
|
best mean_acc=0.8407 (std=0.0098) params={'hidden_layer_sizes': (128,), 'alpha': 0.001, 'learning_rate_init': 0.01, 'activation': 'relu', 'solver': 'adam'}
|
||||||
|
|
||||||
|
[TUNING] scale_pca_85 + knn (cv=5) ...
|
||||||
|
[scale_pca_85 | knn] combo 1/1 mean=0.8313 params={'n_neighbors': 9, 'weights': 'distance', 'p': 2}
|
||||||
|
best mean_acc=0.8313 (std=0.0117) params={'n_neighbors': 9, 'weights': 'distance', 'p': 2}
|
||||||
|
|
||||||
|
[TUNING] scale + svm (cv=5) ...
|
||||||
|
[scale | svm] combo 1/1 mean=0.8582 params={'kernel': 'rbf', 'C': 4, 'gamma': 'scale', 'class_weight': None}
|
||||||
|
best mean_acc=0.8582 (std=0.0133) params={'kernel': 'rbf', 'C': 4, 'gamma': 'scale', 'class_weight': None}
|
||||||
|
|
||||||
|
=== Tuning summary (best overall) ===
|
||||||
|
{'name': 'scale + svm', 'preprocess_spec': {'type': 'pipeline', 'steps': [{'type': 'scaler', 'params': {}}]}, 'preprocess_name': 'scale', 'model': 'svm', 'params': {'kernel': 'rbf', 'C': 4, 'gamma': 'scale', 'class_weight': None}, 'mean_acc': 0.8581719138625145, 'std_acc': 0.013348223889216927}
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
[FINAL - VALIDATION] scale + rf
|
||||||
|
Confusion matrix:
|
||||||
|
[[338 6 5 3 2]
|
||||||
|
[ 4 239 11 12 78]
|
||||||
|
[ 11 2 316 21 1]
|
||||||
|
[ 3 12 17 299 12]
|
||||||
|
[ 13 71 4 9 260]]
|
||||||
|
|
||||||
|
Classification report:
|
||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
1 0.92 0.95 0.93 354
|
||||||
|
2 0.72 0.69 0.71 344
|
||||||
|
3 0.90 0.90 0.90 351
|
||||||
|
4 0.87 0.87 0.87 343
|
||||||
|
5 0.74 0.73 0.73 357
|
||||||
|
|
||||||
|
accuracy 0.83 1749
|
||||||
|
macro avg 0.83 0.83 0.83 1749
|
||||||
|
weighted avg 0.83 0.83 0.83 1749
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
[FINAL] scale_rf: saved labelsX_scale_rf.npy shape=(6955,)
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
[FINAL - VALIDATION] scale + mlp
|
||||||
|
Confusion matrix:
|
||||||
|
[[338 1 9 2 4]
|
||||||
|
[ 5 244 13 7 75]
|
||||||
|
[ 10 3 320 16 2]
|
||||||
|
[ 0 14 16 302 11]
|
||||||
|
[ 8 74 1 16 258]]
|
||||||
|
|
||||||
|
Classification report:
|
||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
1 0.94 0.95 0.95 354
|
||||||
|
2 0.73 0.71 0.72 344
|
||||||
|
3 0.89 0.91 0.90 351
|
||||||
|
4 0.88 0.88 0.88 343
|
||||||
|
5 0.74 0.72 0.73 357
|
||||||
|
|
||||||
|
accuracy 0.84 1749
|
||||||
|
macro avg 0.83 0.84 0.83 1749
|
||||||
|
weighted avg 0.83 0.84 0.84 1749
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
[FINAL] scale_mlp: saved labelsX_scale_mlp.npy shape=(6955,)
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
[FINAL - VALIDATION] scale_pca_85 + knn
|
||||||
|
Confusion matrix:
|
||||||
|
[[346 2 5 0 1]
|
||||||
|
[ 5 193 9 7 130]
|
||||||
|
[ 19 1 319 11 1]
|
||||||
|
[ 4 9 17 301 12]
|
||||||
|
[ 8 33 1 6 309]]
|
||||||
|
|
||||||
|
Classification report:
|
||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
1 0.91 0.98 0.94 354
|
||||||
|
2 0.81 0.56 0.66 344
|
||||||
|
3 0.91 0.91 0.91 351
|
||||||
|
4 0.93 0.88 0.90 343
|
||||||
|
5 0.68 0.87 0.76 357
|
||||||
|
|
||||||
|
accuracy 0.84 1749
|
||||||
|
macro avg 0.85 0.84 0.84 1749
|
||||||
|
weighted avg 0.85 0.84 0.84 1749
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
[FINAL] scale_pca_85_knn: saved labelsX_scale_pca_85_knn.npy shape=(6955,)
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
[FINAL - VALIDATION] scale + svm
|
||||||
|
Confusion matrix:
|
||||||
|
[[340 2 8 1 3]
|
||||||
|
[ 3 251 9 6 75]
|
||||||
|
[ 7 1 327 14 2]
|
||||||
|
[ 0 12 9 311 11]
|
||||||
|
[ 11 63 1 8 274]]
|
||||||
|
|
||||||
|
Classification report:
|
||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
1 0.94 0.96 0.95 354
|
||||||
|
2 0.76 0.73 0.75 344
|
||||||
|
3 0.92 0.93 0.93 351
|
||||||
|
4 0.91 0.91 0.91 343
|
||||||
|
5 0.75 0.77 0.76 357
|
||||||
|
|
||||||
|
accuracy 0.86 1749
|
||||||
|
macro avg 0.86 0.86 0.86 1749
|
||||||
|
weighted avg 0.86 0.86 0.86 1749
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
[FINAL] scale_svm: saved labelsX_scale_svm.npy shape=(6955,)
|
||||||
|
Saved labels to labelsX.npy with shape (6955,)
|
||||||
|
|
||||||
|
Process finished with exit code 0
|
||||||
|
|
||||||
11
src/partA.py
@ -197,6 +197,17 @@ def plot_gaussians_3d(
|
|||||||
ax.set_zlabel("pdf")
|
ax.set_zlabel("pdf")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
# plt.figure(figsize=(6, 5))
|
||||||
|
# plt.scatter(X[:, 0], X[:, 1], s=10, alpha=0.35)
|
||||||
|
# plt.contour(Xgrid, Ygrid, Z, levels=8, linewidths=1.5)
|
||||||
|
#
|
||||||
|
# plt.title("Estimated Gaussian density (ML)")
|
||||||
|
# plt.xlabel("x₁")
|
||||||
|
# plt.ylabel("x₂")
|
||||||
|
#
|
||||||
|
# plt.tight_layout()
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
|
|||||||
@ -302,7 +302,7 @@ def plot_histogram_with_pdf(
|
|||||||
plt.plot(x_plot, pdf_true, label=f"True N({mu_true}, {var_true}) pdf")
|
plt.plot(x_plot, pdf_true, label=f"True N({mu_true}, {var_true}) pdf")
|
||||||
plt.xlabel("x")
|
plt.xlabel("x")
|
||||||
plt.ylabel("Density")
|
plt.ylabel("Density")
|
||||||
plt.title("Dataset2 histogram vs true N({mu_true}, {var_true}) pdf")
|
plt.title(f"Dataset2 histogram vs true N({mu_true}, {var_true}) pdf")
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.grid(True)
|
plt.grid(True)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|||||||
161
src/partD.py
@ -33,6 +33,8 @@ import matplotlib as mpl
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.model_selection import StratifiedKFold
|
||||||
|
|
||||||
from sklearn.pipeline import Pipeline
|
from sklearn.pipeline import Pipeline
|
||||||
from sklearn.preprocessing import StandardScaler
|
from sklearn.preprocessing import StandardScaler
|
||||||
from sklearn.decomposition import PCA
|
from sklearn.decomposition import PCA
|
||||||
@ -389,8 +391,9 @@ def plot_accuracy_bars(df: pd.DataFrame, title: str) -> None:
|
|||||||
plt.grid(True, axis="y", alpha=0.3)
|
plt.grid(True, axis="y", alpha=0.3)
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.show(block=False)
|
|
||||||
plt.savefig(f"figures/" + title + ".png", dpi=300)
|
plt.savefig(f"figures/" + title + ".png", dpi=300)
|
||||||
|
plt.show(block=False)
|
||||||
|
plt.pause(2)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
@ -404,8 +407,9 @@ def plot_confusion(y_true: np.ndarray, y_pred: np.ndarray, title: str) -> None:
|
|||||||
disp.plot(ax=ax, cmap="Blues", colorbar=True)
|
disp.plot(ax=ax, cmap="Blues", colorbar=True)
|
||||||
ax.set_title(title)
|
ax.set_title(title)
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.show(block=False)
|
|
||||||
plt.savefig(f"figures/" + title + ".png", dpi=300)
|
plt.savefig(f"figures/" + title + ".png", dpi=300)
|
||||||
|
plt.show(block=False)
|
||||||
|
plt.pause(2)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
@ -457,7 +461,7 @@ def plot_pca_scatter_2d(
|
|||||||
bbox_inches="tight",
|
bbox_inches="tight",
|
||||||
)
|
)
|
||||||
plt.show(block=False)
|
plt.show(block=False)
|
||||||
plt.pause(0.001)
|
plt.pause(2)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
@ -497,7 +501,7 @@ def plot_feature_separability(
|
|||||||
bbox_inches="tight",
|
bbox_inches="tight",
|
||||||
)
|
)
|
||||||
plt.show(block=False)
|
plt.show(block=False)
|
||||||
plt.pause(0.001)
|
plt.pause(2)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
# Plot worst
|
# Plot worst
|
||||||
@ -514,7 +518,7 @@ def plot_feature_separability(
|
|||||||
bbox_inches="tight",
|
bbox_inches="tight",
|
||||||
)
|
)
|
||||||
plt.show(block=False)
|
plt.show(block=False)
|
||||||
plt.pause(0.001)
|
plt.pause(2)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
return best_idx, worst_idx
|
return best_idx, worst_idx
|
||||||
@ -554,7 +558,7 @@ def plot_feature_distributions_grid(
|
|||||||
bbox_inches="tight",
|
bbox_inches="tight",
|
||||||
)
|
)
|
||||||
plt.show(block=False)
|
plt.show(block=False)
|
||||||
plt.pause(0.001)
|
plt.pause(2)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
@ -711,6 +715,16 @@ def final_training_for_all_best_configs(
|
|||||||
model = train_classifier(X_tr_p, y_tr, model_spec)
|
model = train_classifier(X_tr_p, y_tr, model_spec)
|
||||||
y_val_pred = model.predict(X_val_p).astype(int)
|
y_val_pred = model.predict(X_val_p).astype(int)
|
||||||
|
|
||||||
|
# --- console output: confusion matrix + report ---
|
||||||
|
cm = confusion_matrix(y_val, y_val_pred)
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print(f"[FINAL - VALIDATION] {preprocess_name} + {model_key}")
|
||||||
|
print("Confusion matrix:")
|
||||||
|
print(cm)
|
||||||
|
print("\nClassification report:")
|
||||||
|
print(classification_report(y_val, y_val_pred))
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
plot_confusion(
|
plot_confusion(
|
||||||
y_val,
|
y_val,
|
||||||
y_val_pred,
|
y_val_pred,
|
||||||
@ -776,12 +790,12 @@ def train_final_and_predict(
|
|||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
# Helpers
|
# Helpers
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
def effect_size_per_feature(X2: np.ndarray, X5: np.ndarray, eps: float = 1e-12) -> np.ndarray:
|
def effect_size_per_feature(Xa: np.ndarray, Xb: np.ndarray, eps: float = 1e-12) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Computes a simple per-feature separability score between two classes.
|
Computes a simple per-feature separability score between two classes.
|
||||||
|
|
||||||
Score (Cohen-like d):
|
Score (Cohen-like d):
|
||||||
d_j = |mu2 - mu5| / sqrt( (var2 + var5)/2 )
|
d_j = |mu_a - mu_b| / sqrt( (var_a + var_b)/2 )
|
||||||
|
|
||||||
Larger d => better separation (less overlap).
|
Larger d => better separation (less overlap).
|
||||||
Smaller d => stronger overlap.
|
Smaller d => stronger overlap.
|
||||||
@ -791,14 +805,14 @@ def effect_size_per_feature(X2: np.ndarray, X5: np.ndarray, eps: float = 1e-12)
|
|||||||
d : ndarray, shape (D,)
|
d : ndarray, shape (D,)
|
||||||
Per-feature separability scores.
|
Per-feature separability scores.
|
||||||
"""
|
"""
|
||||||
mu2 = np.mean(X2, axis=0)
|
mu_a = np.mean(Xa, axis=0)
|
||||||
mu5 = np.mean(X5, axis=0)
|
mu_b = np.mean(Xb, axis=0)
|
||||||
|
|
||||||
var2 = np.var(X2, axis=0)
|
var_a = np.var(Xa, axis=0)
|
||||||
var5 = np.var(X5, axis=0)
|
var_b = np.var(Xb, axis=0)
|
||||||
|
|
||||||
pooled = np.sqrt(0.5 * (var2 + var5) + eps)
|
pooled = np.sqrt(0.5 * (var_a + var_b) + eps)
|
||||||
d = np.abs(mu2 - mu5) / pooled
|
d = np.abs(mu_a - mu_b) / pooled
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
|
||||||
@ -814,8 +828,6 @@ def expand_param_grid(param_grid: Dict[str, List[Any]]) -> List[Dict[str, Any]]:
|
|||||||
return combos
|
return combos
|
||||||
|
|
||||||
|
|
||||||
from sklearn.model_selection import StratifiedKFold
|
|
||||||
|
|
||||||
def stratified_kfold_indices(y: np.ndarray, n_splits: int, seed: int = 0):
|
def stratified_kfold_indices(y: np.ndarray, n_splits: int, seed: int = 0):
|
||||||
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
|
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
|
||||||
return list(skf.split(np.zeros_like(y), y))
|
return list(skf.split(np.zeros_like(y), y))
|
||||||
@ -1184,52 +1196,93 @@ TUNING_SPECS = [
|
|||||||
# "preprocess": PREPROCESS_SPECS["scale"],
|
# "preprocess": PREPROCESS_SPECS["scale"],
|
||||||
# "model": "rf",
|
# "model": "rf",
|
||||||
# "param_grid": {
|
# "param_grid": {
|
||||||
# "n_estimators": [400, 800, 1200, 1400], #[200, 400, 800],
|
# # Tuned values
|
||||||
# "max_depth": [None], #[None, 20, 40, 80],
|
# "n_estimators": [400],
|
||||||
# "max_features": ["sqrt"], #["sqrt", "log2", 0.5],
|
# "max_depth": [None],
|
||||||
# "min_samples_split": [2, 4, 8, 10],#[2, 5, 10],
|
# "max_features": ["sqrt"],
|
||||||
# "min_samples_leaf": [1, 2, 4], #[1, 2, 4],
|
# "min_samples_split": [4],
|
||||||
|
# "min_samples_leaf": [1],
|
||||||
|
# #
|
||||||
|
# # Tuned with the values below
|
||||||
|
# # Note:
|
||||||
|
# # Uncomment the following if you want to run the entire tuning process again!
|
||||||
|
# # ** Take a LOT of time **
|
||||||
|
# # "n_estimators": [200, 400, 800, 1200. 1400],
|
||||||
|
# # "max_depth": [None, 20, 40, 80],
|
||||||
|
# # "max_features": ["sqrt", "log2", 0.5],
|
||||||
|
# # "min_samples_split": [2, 4, 5, 8, 10],
|
||||||
|
# # "min_samples_leaf": [1, 2, 4],
|
||||||
|
# },
|
||||||
|
# "cv": 5,
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# "name": "scale + mlp",
|
||||||
|
# "preprocess_name": "scale",
|
||||||
|
# "preprocess": PREPROCESS_SPECS["scale"],
|
||||||
|
# "model": "mlp",
|
||||||
|
# "param_grid": {
|
||||||
|
# # Tuned values
|
||||||
|
# "hidden_layer_sizes": [(128,)],
|
||||||
|
# "alpha": [0.001],
|
||||||
|
# "learning_rate_init": [0.01],
|
||||||
|
# "activation": ["relu"],
|
||||||
|
# "solver": ["adam"],
|
||||||
|
# #
|
||||||
|
# # Tuned with the values below
|
||||||
|
# # Note:
|
||||||
|
# # Uncomment the following if you want to run the entire tuning process again!
|
||||||
|
# # ** Take a LOT of time **
|
||||||
|
# # "hidden_layer_sizes": [(128, ), (128, 64), (256, 128), (128, 64, 32)],
|
||||||
|
# # "alpha": [1e-5, 1e-4, 1e-3, 0.01],
|
||||||
|
# # "learning_rate_init": [1e-4, 1e-3, 0.01, 0.02],
|
||||||
|
# # "activation": ["relu", "tanh"],
|
||||||
|
# # # "max_iter": [2000],
|
||||||
|
# # "solver": ["adam", "sgd"],
|
||||||
|
# },
|
||||||
|
# "cv": 5,
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# "name": "scale_pca_85 + knn",
|
||||||
|
# "preprocess_name": "scale_pca_85",
|
||||||
|
# "preprocess": PREPROCESS_SPECS["scale_pca_85"],
|
||||||
|
# "model": "knn",
|
||||||
|
# "param_grid": {
|
||||||
|
# # Tuned values
|
||||||
|
# "n_neighbors": [9],
|
||||||
|
# "weights": ["distance"],
|
||||||
|
# "p": [2],
|
||||||
|
# #
|
||||||
|
# # Tuned with the values below
|
||||||
|
# # Note:
|
||||||
|
# # Uncomment the following if you want to run the entire tuning process again!
|
||||||
|
# # ** Take a LOT of time **
|
||||||
|
# # "n_neighbors": [5, 7, 8, 9, 10, 11, 15, 31, 42],
|
||||||
|
# # "weights": ["uniform", "distance"],
|
||||||
|
# # "p": [1, 2],
|
||||||
# },
|
# },
|
||||||
# "cv": 5,
|
# "cv": 5,
|
||||||
# },
|
# },
|
||||||
{
|
|
||||||
"name": "scale + mlp",
|
|
||||||
"preprocess_name": "scale",
|
|
||||||
"preprocess": PREPROCESS_SPECS["scale"],
|
|
||||||
"model": "mlp",
|
|
||||||
"param_grid": {
|
|
||||||
"hidden_layer_sizes": [(128, ), (128, 64), (256, 128), (128, 64, 32)],
|
|
||||||
"alpha": [1e-5, 1e-4, 1e-3],
|
|
||||||
"learning_rate_init": [1e-3, 0.01, 0.02],
|
|
||||||
"activation": ["relu"], #["relu", "tanh"],
|
|
||||||
# "max_iter": [2000],
|
|
||||||
"solver": ["adam"], #["adam", "sgd"],
|
|
||||||
},
|
|
||||||
"cv": 5,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "scale_pca_85 + knn",
|
|
||||||
"preprocess_name": "scale_pca_85",
|
|
||||||
"preprocess": PREPROCESS_SPECS["scale_pca_85"],
|
|
||||||
"model": "knn",
|
|
||||||
"param_grid": {
|
|
||||||
"n_neighbors": [7, 8, 9, 10, 11, 15, 31, 42],
|
|
||||||
"weights": ["uniform", "distance"],
|
|
||||||
"p": [1, 2],
|
|
||||||
},
|
|
||||||
"cv": 5,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "scale + svm",
|
"name": "scale + svm",
|
||||||
"preprocess_name": "scale",
|
"preprocess_name": "scale",
|
||||||
"preprocess": PREPROCESS_SPECS["scale"],
|
"preprocess": PREPROCESS_SPECS["scale"],
|
||||||
"model": "svm",
|
"model": "svm",
|
||||||
"param_grid": {
|
"param_grid": {
|
||||||
"kernel": ["rbf", "poly"],
|
# Tuned values
|
||||||
"C": [3, 4, 5, 5.5, 6, 10],
|
"kernel": ["rbf"],
|
||||||
"degree": [2, 3, 5],
|
"C": [4],
|
||||||
"gamma": ["scale", "auto"],
|
"gamma": ["scale"],
|
||||||
"class_weight": [None],
|
"class_weight": [None],
|
||||||
|
#
|
||||||
|
# Tuned with the values below
|
||||||
|
# Note:
|
||||||
|
# Uncomment the following if you want to run the entire tuning process again!
|
||||||
|
# ** Take a LOT of time **
|
||||||
|
# "kernel": ["rbf", "poly"],
|
||||||
|
# "C": [0.1, 0.3, 1, 3, 4, 5, 5.5, 6, 10, 30],
|
||||||
|
# # "degree": [2, 3, 5], (only for "poly")
|
||||||
|
# "gamma": ["scale", "auto", 0.1, 0.03, 0.01, 0.003, 0.001],
|
||||||
|
# "class_weight": [None, "balanced"],
|
||||||
},
|
},
|
||||||
"cv": 5,
|
"cv": 5,
|
||||||
},
|
},
|
||||||
@ -1262,7 +1315,7 @@ if __name__ == "__main__":
|
|||||||
# Phase 1.2: visualization
|
# Phase 1.2: visualization
|
||||||
visualization_phase(results, df)
|
visualization_phase(results, df)
|
||||||
|
|
||||||
# Phase 1,3: problem demo
|
# Phase 1.3: problem demo
|
||||||
problem_demonstration_phase(X_train_raw, y_train, class_a=2, class_b=5, top_k=9)
|
problem_demonstration_phase(X_train_raw, y_train, class_a=2, class_b=5, top_k=9)
|
||||||
|
|
||||||
if param == "phase2" or param == "all":
|
if param == "phase2" or param == "all":
|
||||||
@ -1284,7 +1337,7 @@ if __name__ == "__main__":
|
|||||||
seed=0,
|
seed=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# (Optional) also train/predict only for the best overall and save as the official submission file
|
# Also train/predict only for the best overall and save as the official submission file
|
||||||
y_test_pred = train_final_and_predict(
|
y_test_pred = train_final_and_predict(
|
||||||
X_train_raw, y_train, X_test_raw, best_overall, labels_path="labelsX.npy"
|
X_train_raw, y_train, X_test_raw, best_overall, labels_path="labelsX.npy"
|
||||||
)
|
)
|
||||||
|
|||||||