`
import os
import random
import time
import gc
import librosa
import soundfile as sf
import logging
asyncio importieren
aus scipy.signal fftconvolve importieren
nest_asyncio importieren
nest_asyncio.apply() # Ermöglicht verschachtelte Ereignisschleifen in Jupyter
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
class DatasetAugmentation:
def __init__(self, data):
self.Data = data
self.audio_pitch_range = [-2, 2] # Pitch shift in semitones
self.audio_stretch_range = [0.8, 1.2] # Stretch rate
self.snr_range = [10, 30] # SNR for noise addition
self.reverb_decay_range = [0.2, 0.6] # Reverb decay
self.reverb_delay_range = [20, 100] # Reverb delay in ms
self.load_sr = 16000
self.max_voice_length = 30#sec
self.datasetbalance = 20
self.augmentation_variations = 4
self.needed_augs = 0
self.saving_break = 1
async def augment_dataset(self):
"""
Process the dataset sequentially with memory optimization.
"""
try:
for speaker in self.Data:
dir_name = speaker[0][1]
ultrasounds = len(speaker)
if ultrasounds < self.datasetbalance:
self.needed_augs, aug_rounds = self.calculate_augs_loop(ultrasounds)
voices = [voice for voice, name in speaker]
aug_list = random.choices(voices, k=aug_rounds)
logger.info(f"Processing speaker folder: {dir_name}")
for file in aug_list:
try:
logger.info(f"Loading file: {file}")
wave, sr = await self.load_audio(file, self.load_sr)
#await wave, sr = librosa.load(file, sr=self.load_sr, mono=True, offset=0.0, duration=None)
logger.info(f"Starting augmentation for file: {file}")
# Perform augmentations
await self.process_augmentation(wave, sr, file, dir_name)
# Release memory after processing each file
del wave, sr
gc.collect()
await asyncio.sleep(3) # Small delay to reduce CPU load
except Exception as e:
logger.error(f"Error processing file {file}: {e}")
logger.info(f"Finished processing speaker folder: {dir_name}")
gc.collect()
self.needed_augs = 0
except Exception as e:
logger.error(f"Error augmenting dataset: {e}")
async def load_audio(self, file, sr):
"""
Asynchronously load audio using librosa.
"""
loop = asyncio.get_event_loop()
wave, sample_rate = await loop.run_in_executor(
None, lambda: librosa.load(file, sr=sr, mono=True, offset=0.0, duration=None)
)
if wave.shape[0] / sr > self.max_voice_length:
wave = wave[:sr * self.max_voice_length]
return wave, sample_rate
async def process_augmentation(self, wave, sr, file, dir_name):
"""
Perform all augmentations on a single file sequentially with memory cleanup.
"""
try:
name = os.path.basename(file).split('.')[0]
# Perform and save time stretch
if self.needed_augs:
await self.safe_process(
wave,
sr,
dir_name,
f"{name}_time",
lambda w, s: self.time_stretch(w, random.uniform(*self.audio_stretch_range)),
)
self.needed_augs -=1
await asyncio.sleep(self.saving_break)
if self.needed_augs:
# Perform and save pitch shift
await self.safe_process(
wave,
sr,
dir_name,
f"{name}_pitch",
lambda w, s: self.pitch_shift(w, s, random.uniform(*self.audio_pitch_range)),
)
self.needed_augs -=1
await asyncio.sleep(self.saving_break)
if self.needed_augs:
# Perform and save noise addition
await self.safe_process(
wave,
sr,
dir_name,
f"{name}_noise",
lambda w, s: self.add_noise(w, snr_db=random.uniform(*self.snr_range)),
)
self.needed_augs -=1
await asyncio.sleep(self.saving_break)
if self.needed_augs:
# Perform and save reverb
await self.safe_process(
wave,
sr,
dir_name,
f"{name}_reverb",
lambda w, s: self.apply_reverb(w, s, decay=random.uniform(*self.reverb_decay_range),
delay_ms=random.randint(*self.reverb_delay_range)),
)
self.needed_augs -=1
await asyncio.sleep(self.saving_break)
logger.info(f"Completed all augmentations for {file}")
except Exception as e:
logger.error(f"Error during augmentations for {file}: {e}")
async def safe_process(self, wave, sr, dir_name, name_suffix, augmentation_fn):
"""
Safely process and save each augmentation step.
"""
try:
augmented_wave = augmentation_fn(wave, sr)
await self.save_file(augmented_wave, sr, dir_name, name_suffix)
del augmented_wave
gc.collect() # Force garbage collection after saving
except Exception as e:
logger.error(f"Error in {name_suffix}: {e}")
async def save_file(self, audio, sr, dir_name, name_suffix):
"""
Save the processed audio to the appropriate folder.
"""
try:
output_dir = os.path.join(Data_set_Augmented_path, dir_name)
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, f"{name_suffix}_{int(time.perf_counter_ns())}.ogg")
# Save the file asynchronously
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, sf.write, output_path, audio, sr)
while not os.path.isfile(output_path):
logger.info("Waiting for file to save...")
await asyncio.sleep(0.1) # Adjust the delay as needed
logger.info(f"Saved file: {output_path}")
# Explicitly release memory
del audio
gc.collect()
await asyncio.sleep(2)
except Exception as e:
logger.error(f"Error saving file {name_suffix}: {e}")
async def ssave_file(self, audio, sr, dir_name, name_suffix):
"""
Save the processed audio to the appropriate folder.
"""
try:
output_dir = os.path.join(Data_set_Augmented_path, dir_name)
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, f"{name_suffix}_{int(time.perf_counter_ns())}.ogg")
# Save the file asynchronously
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, sf.write, output_path, audio, sr)
while not os.path.isfile(output_path):
print("waited 5 microseconds")
await asyncio.sleep(3)
logger.info(f"Saved file: {output_path}")
except Exception as e:
logger.error(f"Error saving file {name_suffix}: {e}")
def pitch_shift(self, audio, sr, n_steps):
"""Apply pitch shift."""
return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
def time_stretch(self, audio, rate):
"""Apply time stretch."""
return librosa.effects.time_stretch(audio, rate=rate)
def add_noise(self, audio, snr_db=20):
"""Add noise dynamically."""
signal_power = np.mean(audio ** 2)
snr_linear = 10 ** (snr_db / 10)
noise_power = signal_power / snr_linear
noise = np.random.normal(0, np.sqrt(noise_power), audio.shape)
return audio + noise
def apply_reverb(self, audio, sr, decay=0.5, delay_ms=50):
"""Simulate reverb."""
delay_samples = int(sr * (delay_ms / 1000.0))
impulse_response = np.zeros(delay_samples * 4)
impulse_response[::delay_samples] = decay ** np.arange(4)
reverb_audio = fftconvolve(audio, impulse_response, mode='full')
return reverb_audio[:len(audio)]
def calculate_augs_loop(self, len):
m = self.datasetbalance - len
return m, int(math.ceil(m/self.augmentation_variations))
# Function to apply pitch shifting
# Example usage
def load_data(data_path):
"""Load dataset structure."""
results = []
for speaker_dir in os.listdir(data_path):
speaker_path = os.path.join(data_path, speaker_dir)
if os.path.isdir(speaker_path):
files = [(os.path.join(speaker_path, f), speaker_dir) for f in os.listdir(speaker_path)]
results.append(files)
return results
if __name__ == "__main__":
Data_set_files = load_data("D:/Projects/Dataset/CustomDatasets/DataSet_001_Augmented")
Data_set_Augmented_path = "D:/Projects/Dataset/CustomDatasets/DataSet_001_Augmented"
dataset_augment = DatasetAugmentation(Data_set_files)
# Create an event loop if not already running
loop = asyncio.get_event_loop()
loop.run_until_complete(dataset_augment.augment_dataset())
`
Wenn ich diesen Code ausführe, wird eine Meldung angezeigt und das Programm wird jetzt beendet. Ausnahme: Die Meldung besagt, dass der Kernel gestorben ist und automatisch neu gestartet wird
Stapelüberlauf beim Speichern der Dateien in einer Schleife, der Kernel stirbt. Ich habe den Ruhezustand eingedämmt, hat ⇐ Python
-
- Similar Topics
- Replies
- Views
- Last post