Why does the Confusion Matrix in Soft Voting vary across multiple iterations? Does probability=True have an influence? How can the Confusion Matrix results be consistent in Soft Voting?
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
import numpy as np
from sklearn.metrics import classification_report
X = dataset_baru.iloc[:, :-1].values
y = dataset_baru.iloc[:, -1].values
# Initialize SVM models with different kernels
svm_kernel1_soft = SVC(kernel='linear', probability=True)
svm_kernel2_soft = SVC(kernel='poly', probability=True)
svm_kernel3_soft = SVC(kernel='rbf', probability=True)
svm_kernel1_hard = SVC(kernel='linear')
svm_kernel2_hard = SVC(kernel='poly')
svm_kernel3_hard = SVC(kernel='rbf')
# Perform K-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
confusion_matrices_hard = []
confusion_matrices_soft = []
for train_index, test_index in kfold.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
# Initialize VotingClassifier with hard voting
voting_clf_hard = VotingClassifier(
estimators=[('svm1', svm_kernel1_hard), ('svm2', svm_kernel2_hard), ('svm3', svm_kernel3_hard)],
voting='hard'
)
# Fit the VotingClassifier with hard voting
voting_clf_hard.fit(X_train, y_train)
# Predict labels using hard voting
y_pred_hard = voting_clf_hard.predict(X_test)
# Initialize VotingClassifier with soft voting
voting_clf_soft = VotingClassifier(
estimators=[('svm1', svm_kernel1_soft), ('svm2', svm_kernel2_soft), ('svm3', svm_kernel3_soft)],
voting='soft'
)
# Fit the VotingClassifier with soft voting
voting_clf_soft.fit(X_train, y_train)
# Predict labels using soft voting
y_pred_soft = voting_clf_soft.predict(X_test)
# Calculate confusion matrices for hard voting
confusion_matrices_hard.append(confusion_matrix(y_test, y_pred_hard))
# Calculate confusion matrices for soft voting
confusion_matrices_soft.append(confusion_matrix(y_test, y_pred_soft))
# Print classification reports for hard voting
print("Classification Report - Hard Voting:")
for i, cm in enumerate(confusion_matrices_hard):
print(f"Fold {i+1}:")
print(classification_report(y_test, y_pred_hard))
# Print classification reports for soft voting
print("Classification Report - Soft Voting:")
for i, cm in enumerate(confusion_matrices_soft):
print(f"Fold {i+1}:")
print(classification_report(y_test, y_pred_soft))
# Mencetak laporan klasifikasi untuk hard voting
print("Classification Report - Hard Voting:")
print(classification_report(y_test, y_pred_hard))
# Mencetak laporan klasifikasi untuk soft voting
print("Classification Report - Soft Voting:")
print(classification_report(y_test, y_pred_soft))
When I tried to remove probability=True from SVM, an error occurred.
AttributeError: predict_proba is not available when probability=False