Um eine Überanpassung zu erreichen, folge ich ein paar Ideen:
- keinen Regularisierer anwenden
- Ich verwende eine große Stapelgröße
- verwende a Angemessene Lernrate.
Code: Select all
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import joblib
def PrepareData(dataset_path):
"""Prepare and scale the data."""
dataset = np.load(dataset_path, allow_pickle=True)
X = dataset[:, :-1]
y = dataset[:, -1]
# Create and fit scalers
feature_scaler = StandardScaler()
X_scaled = feature_scaler.fit_transform(X)
label_scaler = MinMaxScaler()
y_scaled = label_scaler.fit_transform(y.reshape(-1, 1)).flatten()
# Save scalers and reference data
joblib.dump(feature_scaler, 'feature_scaler.pkl')
joblib.dump(label_scaler, 'label_scaler.pkl')
np.save('reference_X.npy', X) # Save reference data
np.save('reference_y.npy', y)
# Reshape features for the model
X_scaled = X_scaled[:, :, np.newaxis]
return X_scaled, y_scaled, y, feature_scaler, label_scaler
Code: Select all
Epoch 407/500
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step - loss: 0.0251 - mean_absolute_error: 0.0251
Epoch 407: mean_absolute_error improved from 0.03041 to 0.02641, saving model to model_default.keras
Calculated Mean Absolute Error (Original Scale): 1.3444
Batch-wise Scaled MAE: 0.022792
Keras MAE (logs): 0.026412
Beispiel:
Code: Select all
Training:
Sample 154: Predicted = -69.3622, Actual = -67.8924, Absolute Error = 1.4698
Sample 194: Predicted = -66.9870, Actual = -67.3688, Absolute Error = 0.3818
Test:
Sample 154: Predicted: -190.6574, Actual: -67.8924, Diff: -122.7650
Sample 194: Predicted: -69.2418, Actual: -67.3688, Diff: -1.8730
Hier ist meine Test()-Methode:
Code: Select all
import numpy as np
from keras.models import load_model
import joblib
from os import walk, path
# Load scalers and model
feature_scaler = joblib.load('feature_scaler.pkl')
label_scaler = joblib.load('label_scaler.pkl')
model = load_model('model_default.keras')
def Test(args_):
ref_X = np.load('reference_X.npy') # references
# I'm accessing to each np array.
for folder, _, samples in walk('./predictions'):
for file in samples:
if file.endswith('npy'):
name = file.split(".")[0]
sample_path = path.join(folder, file)
# loading the sample.
sample_data = np.load(sample_path)
if sample_data.ndim == 1: # check for discrepancies
sample_data = sample_data.reshape(1, -1)
if sample_data.shape[1] < ref_X.shape[1]: # and pad if different (none are)
padding = np.zeros((sample_data.shape[0], ref_X.shape[1] - sample_data.shape[1]))
sample_data = np.hstack((sample_data, padding))
# !!! scaling
sample_scaled = feature_scaler.transform(sample_data)
sample_scaled = sample_scaled.reshape(sample_scaled.shape[0], sample_scaled.shape[1], 1)
# Predicting
prediction_scaled = model.predict(sample_scaled, verbose=0)
# converting from the prediction to the real data
prediction_rescaled = label_scaler.inverse_transform(prediction_scaled)
# Read from the source file
real = None
with open('source.txt', 'r') as comp:
for line in comp.readlines():
if name in line:
real = float(line.split()[3])
break
# calculate differences
if real is not None:
diff = prediction_rescaled[0][0] - real
print(f"{name} - Predicted: {prediction_rescaled[0][0]:.4f}, "
f"Real: {real:.4f}, Diff: {diff:.4f}")
print("-" * 50)
Ich habe es versucht:
- unterschiedliche Skalierung
- Speichern und Laden der Skalierungsfaktoren als .pkl
- Neuberechnung des Skalierungsfaktors zu den Trainingsdaten
- Neuberechnung des Skalierungsfaktors anhand der Testdaten
- Durchschnitt, Min. und Max. der versuchten Daten