Kein Ausgang bei der Verwendung von schnellerem Whisper für die Transkription in PythonPython

Python-Programme
Guest
 Kein Ausgang bei der Verwendung von schnellerem Whisper für die Transkription in Python

Post by Guest »

Ich versuche, in einem Jupyter-Notebook schnellere Whisper für die Transkription für Sprach-Text-Transkription zu verwenden. Ich habe ein YouTube -Tutorial gefolgt und meinen Code mit ChatGPT verfeinert, aber ich bekomme keine Transkriptionsausgabe. Die Aufzeichnung scheint zu funktionieren, aber die Funktion der Sprache_Recognition zeigt keinen Text an. < /P>
Hier ist mein Code: < /p>

Code: Select all

import ipywidgets as wd
from IPython.display import display
from threading import Thread
from queue import Queue
import sounddevice as sd
import numpy as np
import faster_whisper
import pyaudio

# Load the whisper model
model = faster_whisper.WhisperModel("small", device="cpu", compute_type="int8")
recordings = Queue()

# UI buttons
record_button = wd.Button(description="Record", disabled=False, button_style="success", icon="microphone")
stop_button = wd.Button(description="Stop", disabled=False, button_style="warning", icon="stop")
output = wd.Output()

# PyAudio setup
p = pyaudio.PyAudio()
default_device_index = p.get_default_input_device_info().get("index", None)

CHANNELS = 1
FRAME_RATE = 16000
RECORD_SECONDS = 20
AUDIO_FORMAT = pyaudio.paInt16
SAMPLE_SIZE = 2
CHUNK = 1024
is_recording = False

def record_microphone():
"""Records audio from the microphone and puts it in a queue."""
global is_recording

p = pyaudio.PyAudio()
stream = p.open(format=AUDIO_FORMAT, channels=CHANNELS, rate=FRAME_RATE,
input=True, input_device_index=default_device_index, frames_per_buffer=CHUNK)

while is_recording:
data = stream.read(CHUNK)
recordings.put(data)

stream.stop_stream()
stream.close()
p.terminate()

def speech_recognition():
"""Processes audio from the queue and transcribes it using Faster-Whisper."""
audio_buffer = []

while is_recording or not recordings.empty():
if not recordings.empty():
data = recordings.get()
audio_buffer.append(np.frombuffer(data, dtype=np.int16))
if len(audio_buffer) * CHUNK >= FRAME_RATE:
# Normalize audio
audio_chunk = np.concatenate(audio_buffer).astype(np.float32) / 32768
audio_buffer = []
segments, _ = model.transcribe(audio_chunk, language="en", beam_size=5)
with output:
for segment in segments:
display(segment.text)

def start_recording(data):
"""Starts recording and transcription threads."""
global is_recording
is_recording = True

with output:
display("Listening...")

record_thread = Thread(target=record_microphone)
transcribe_thread = Thread(target=speech_recognition)

record_thread.start()
transcribe_thread.start()

def stop_recording(data):
"""Stops the recording process."""
global is_recording
is_recording = False
with output:
display("Stopped.")

record_button.on_click(start_recording)
stop_button.on_click(stop_recording)

display(record_button, stop_button, output)

Jede Hilfe wird sehr geschätzt

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post