Wie verbessert man die CV -Geschwindigkeit der Rasterforschung?
Posted: 02 Apr 2025, 19:33
Der folgende Code benötigt 7-8 Stunden für einen 38000 Zeilendatensatz. Der Code gibt nur die Modellparamente mit bestem Genauigkeit zurück. Präsident und Rückrufmaterie. Um Zeit zu sparen, habe ich nur Präzision in den Code eingebaut, da dies Priorität hat. Dies ist auf meinem PC mit 14 Kernen. Logische Prozessoren: 20 < /p>
Möchten Sie dies beschleunigen und auf AWS -Kleber -Crawler ausführen.>
Code: Select all
def createModelAndTrain(input_data_df):
target=input_data_df['target_flag']
inputs=input_data_df.drop(['a','b','c'],axis=1)
inputs.fillna(0, inplace=True)
input_data_df.sample(frac=1).reset_index(drop=True)
x_train,x_test,y_train,y_test = train_test_split(inputs,target,test_size=0.2,random_state=365,stratify=target)
scaling= MinMaxScaler(feature_range=(-1,1)).fit(x_train)
x_train_rescaled=scaling.transform(x_train)
tuned_parameters = [
{"kernel": ["linear"], "C": [1, 10,100,1000,10000]},
{"kernel": ["poly"], "C":[1, 10,100,1000,10000]},
{"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [1, 10,100,1000]}
]
scores = ["precision"]
#scores = ["precision", "recall"]
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
#precision_scorer = make_scorer(precision_score, zero_division=0)
#custom_scoring = {"accuracy": "accuracy", "precision": precision_scorer, "recall": "recall", "f1": "f1"}
clf = GridSearchCV(svm.SVC(), tuned_parameters, cv=5, scoring="%s_macro" % score, n_jobs = -1)
#clf = GridSearchCV(svm.SVC(), tuned_parameters, scoring=custom_scoring,refit="accuracy")
clf.fit(x_train_rescaled, y_train)
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_["mean_test_score"]
stds = clf.cv_results_["std_test_score"]
for mean, std, params in zip(means, stds, clf.cv_results_["params"]):
print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print()
print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, clf.predict(scaling.transform(x_test))
print(confusion_matrix(y_true,y_pred))
print(classification_report(y_true, y_pred))
return clf.best_params_