Zeichnen der ROC-Kurve aus der VerwirrungsmatrixPython

Python-Programme
Anonymous
 Zeichnen der ROC-Kurve aus der Verwirrungsmatrix

Post by Anonymous »

Ich muss ermitteln, wie gut verschiedene Klassifizierungsmodelle Werte vorhersagen. Dazu muss ich eine ROC-Kurve zeichnen, aber es fällt mir schwer, einen Ansatz zu entwickeln.

Ich habe meinen gesamten Python-Code sowie den Link zu dem von mir verwendeten Datensatz eingefügt. Es scheint eine Menge Code zu sein, ist aber eigentlich wirklich einfach. Das Hauptproblem, das ich finde, ist, dass ich eine 3x3-Konfusionsmatrix habe und nicht weiß, wie ich diese in ein ROC-Diagramm übersetzen soll.

Jede Hilfe wird sehr geschätzt.

Datensatz:

https://archive.ics.uci.edu/ml/machine- ... e-quality/

Code: Select all

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
import seaborn as sns
import numpy as np

#data = pd.read_csv('wineQualityReds.csv', usecols=lambda x: 'Unnamed' not in x,)
data = pd.read_csv('wineQualityWhites.csv', usecols=lambda x: 'Unnamed' not in x,)

# roc curve and auc score
from sklearn.datasets import make_classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

def plot_roc_curve(fpr, tpr):
plt.plot(fpr, tpr, color='orange', label='ROC')
plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.show()

bins = [1,4,6,10]

quality_labels = [0,1,2]

data['quality_categorial'] = pd.cut(data['quality'], bins = bins, labels  = quality_labels, include_lowest = True)

display(data.head(n=2))

quality_raw = data['quality_categorial']
features_raw = data.drop(['quality', 'quality_categorial'], axis = 1)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features_raw, quality_raw, test_size = 0.2, random_state = 0)

from sklearn.metrics import fbeta_score
from sklearn.metrics import accuracy_score

def train_predict_evaluate(learner, sample_size, X_train, y_train, X_test, y_test):
results = {}

#start = time()
learner = learner.fit(X_train[:sample_size], y_train[:sample_size])
#end = time()

#results['train_time'] = end - start

#start = time()
predictions_train = learner.predict(X_train[:300])
predictions_test = learner.predict(X_test)

#end = time()

#results['pred_time'] = end - start

results['acc_train'] = accuracy_score(y_train[:300], predictions_train)

results['acc_test'] = accuracy_score(y_test, predictions_test)

results['f_train'] = fbeta_score(y_train[:300], predictions_train, beta  = 0.5, average = 'micro')

results['f_test'] = fbeta_score(y_test, predictions_test, beta = 0.5, average = 'micro')

#####################
#array = print(confusion_matrix(y_test, predictions_test))
labels = ['Positives','Negatives']
cm = confusion_matrix(y_test, predictions_test)
print(cm)

df_cm = pd.DataFrame(cm, columns=np.unique(y_test), index = np.unique(y_test))
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'

plt.figure(figsize = (10,7))
sns.set(font_scale=1.4)#for label size
sns.heatmap(df_cm, cmap="Blues", annot=True, fmt = 'g',annot_kws={"size": 16})# font size

#######################

print(predictions_test)
#auc = roc_auc_score(y_test, probs)
#print('AUC: %.2f' % auc)

#fpr, tpr, thresholds = roc_curve(y_test, probs)
#plot_roc_curve(fpr, tpr)

print("{} trained on {} samples." .format(learner.__class__.__name__, sample_size))

return results

from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

clf_A = GaussianNB()
clf_B = DecisionTreeClassifier(max_depth=None, random_state=None)
clf_C = RandomForestClassifier(max_depth=None, random_state=None)

samples_100 = len(y_train)
samples_10 = int(len(y_train)*10/100)
samples_1 = int(len(y_train)*1/100)

results = {}
for clf in [clf_A,clf_B,clf_C]:
clf_name = clf.__class__.__name__
results[clf_name] = {}
for i, samples in enumerate([samples_1, samples_10, samples_100]):
results[clf_name][i] = \
train_predict_evaluate(clf, samples, X_train, y_train, X_test, y_test)

train_predict_evaluate(clf_C, samples_100, X_train, y_train, X_test, y_test)

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post