Fehler as_list() beim Versuch, mein Audioklassifizierungsmodell zu trainieren
Posted: 13 Jan 2025, 19:12
Ich habe diesen Google Colab-Code und möchte ein Modell für MP3-Dateien mit einer Länge von 3 Sekunden, 44,10 kHz, 47,8 KB und 128 KBit/s trainieren, die sich in 6 Ordnern befinden, und ich erhalte immer wieder diese Fehlermeldung, wenn ich versuche zu trainieren :
DAS IST MEIN CODE und ich weiß nicht, was falsch ist. Ich hatte einen Fehler beim Laden des Sounds, aber jetzt funktioniert es, weil ich ein tf.Tensor-Objekt anstelle eines Strings erhalten habe. Jetzt weiß ich nicht warum Es funktioniert nicht. Ich verwende Tensoflow 2.10.0 und librosa:
Code: Select all
ValueError Traceback (most recent call last)
in ()
----> 1 hist = model.fit(train, epochs=4, validation_data=test)
1 frames
/usr/local/lib/python3.10/dist-packages/keras/engine/training.py in tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1160, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1146, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1135, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 998, in train_step
return self.compute_metrics(x, y, y_pred, sample_weight)
File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1092, in compute_metrics
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.10/dist-packages/keras/engine/compile_utils.py", line 577, in update_state
self.build(y_pred, y_true)
File "/usr/local/lib/python3.10/dist-packages/keras/engine/compile_utils.py", line 483, in build
self._metrics = tf.__internal__.nest.map_structure_up_to(
File "/usr/local/lib/python3.10/dist-packages/keras/engine/compile_utils.py", line 631, in _get_metric_objects
return [self._get_metric_object(m, y_t, y_p) for m in metrics]
File "/usr/local/lib/python3.10/dist-packages/keras/engine/compile_utils.py", line 631, in
return [self._get_metric_object(m, y_t, y_p) for m in metrics]
File "/usr/local/lib/python3.10/dist-packages/keras/engine/compile_utils.py", line 652, in _get_metric_object
y_t_rank = len(y_t.shape.as_list())
ValueError: as_list() is not defined on an unknown TensorShape.
Code: Select all
from google.colab import drive
drive.mount('/content/drive')
!pip install librosa tensorflow==2.10.0
import tensorflow as tf
import librosa
import os
import joblib
import numpy as np
from matplotlib import pyplot as plt
# Parametri globali
BATCH_SIZE = 4
SAMPLE_RATE = 44100 # Fișierele tale sunt 44.1 kHz
TARGET_SAMPLE_RATE = 16000 # Resamplează la 16 kHz
DURATION = 3 # Durata fișierului audio în secunde
N_MELS = 300 # Număr de benzi Mel
N_FFT = 1024 # Număr de puncte FFT
HOP_LENGTH = int((TARGET_SAMPLE_RATE * DURATION) / 1400) # Calculăm hop_length pentru a obține 1400 frame-uri
# Funcție pentru încărcarea și preprocesarea fișierelor MP3
def load_sound(filename):
file_path = filename
if (isinstance(filename,tf.Tensor)):
file_path = filename.numpy().decode('utf-8')
# Încărcăm fișierul audio MP3 cu Librosa
print("F name: ", file_path) # Verifică că avem path-ul corect
wav, sr = librosa.load(file_path, sr=TARGET_SAMPLE_RATE)
# Padding la o durată fixă de 3 secunde
wav = librosa.util.fix_length(wav, size=TARGET_SAMPLE_RATE * DURATION)
return wav
# Funcție pentru crearea spectrogramelor Mel
def create_spectrogram(file_path, label):
# Încărcăm fișierul audio
wav = load_sound(file_path)
# Calculăm spectrograma Mel
mel_spectrogram = librosa.feature.melspectrogram(
y=wav, sr=TARGET_SAMPLE_RATE, n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH, fmax=8000
)
# Convertim la scară logaritmică (dB)
mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
# Adăugăm o dimensiune suplimentară pentru intrarea modelului
mel_spectrogram = np.expand_dims(mel_spectrogram, axis=-1)
return mel_spectrogram, label
# Directorul de date
base_dir = '/content/drive/MyDrive/house_sounds/sound_data_1200'
folders = ['door', 'voice', 'glass', 'silence', 'dog', 'footsteps']
# Creăm lista de fișiere
files = []
for folder in folders:
folder_path = os.path.join(base_dir, folder)
file_paths = tf.data.Dataset.list_files(os.path.join(folder_path, '*.mp3'))
files.append(file_paths)
# Creăm dataset-ul
door = tf.data.Dataset.zip((files[0], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[0])], 0))))
voice = tf.data.Dataset.zip((files[1], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[1])], 1))))
glass = tf.data.Dataset.zip((files[2], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[2])], 2))))
silence = tf.data.Dataset.zip((files[3], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[3])], 3))))
dog = tf.data.Dataset.zip((files[4], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[4])], 4))))
footsteps = tf.data.Dataset.zip((files[5], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[5])], 5))))
data = door.concatenate(voice).concatenate(glass).concatenate(silence).concatenate(dog).concatenate(footsteps)
total_files = sum([len(files) for files in files])
print(f"Total fișiere: {total_files}")
# Verificăm un exemplu
f_path, l_abel = data.shuffle(buffer_size=10000).as_numpy_iterator().next()
spectrogram, label = create_spectrogram(f_path, l_abel)
plt.figure(figsize=(30, 10))
plt.imshow(spectrogram[:, :, 0], cmap='viridis', aspect='auto')
plt.title(folders[label])
plt.show()
def process_data(file_path, label):
spectrogram, label = tf.py_function(func=create_spectrogram, inp=[file_path, label], Tout=(tf.float32, tf.int32))
spectrogram.set_shape([N_MELS, 1400, 1]) # Asigură-te că setăm dimensiunile
return spectrogram, label
data = data.map(process_data)
data = data.cache()
data = data.shuffle(buffer_size=5000)
data = data.batch(BATCH_SIZE)
data = data.prefetch(8)
print('Len:', len(data))
print('Cardinality:', data.cardinality().numpy())
train = data.take(2300)
test = data.skip(2300).take(150)
for spectrogram, label in train.skip(2299).take(1): # Verifică un lot din train
print("Spectrogram shape:", spectrogram.shape)
print("Label shape:", label.shape)
# Modelul CNN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D
model = Sequential([
Conv2D(16, (3, 3), activation='relu', input_shape=(N_MELS, 1400, 1)),
MaxPooling2D(pool_size=(3, 3)),
Conv2D(32, (3, 3), activation='relu'),
MaxPooling2D(pool_size=(3, 3)),
Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D(pool_size=(3, 3)),
Flatten(),
Dense(32, activation='relu'),
Dense(6, activation='softmax')
])
# Compilăm modelul
model.compile(
optimizer='Adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy']
)
# Antrenăm modelul
model.summary()
hist = model.fit(train, epochs=4, validation_data=test)
# Salvăm modelul și etichetele
model.save('/content/drive/MyDrive/house_sounds/mel_librosa_1400x300_model')
model.save_weights('/content/drive/MyDrive/house_sounds/mel_librosa_1400x300_weights.h5')
joblib.dump(folders, '/content/drive/MyDrive/house_sounds/4_class_labels.pkl')
# Evaluăm modelul
test_loss, test_accuracy = model.evaluate(test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")