In Echtzeit transkribieren Sie von WebSocket aus WebsocketPython

Python-Programme
Anonymous
 In Echtzeit transkribieren Sie von WebSocket aus Websocket

Post by Anonymous »

from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware

import numpy as np
import whisper

app = FastAPI()

# Разрешаем CORS для React приложения
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:3000"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)

def bytes_to_float32_array(audio_bytes: bytes, sample_width: int = 2) -> np.ndarray:
if len(audio_bytes) == 0:
return np.array([], dtype=np.float32)
if sample_width == 1:
format_char = 'B' # unsigned char
dtype = np.uint8
max_val = 255
offset = -128 # сдвиг для преобразования в signed
elif sample_width == 2:
format_char = 'h' # signed short
dtype = np.int16
max_val = 32768 # 2^15
offset = 0
elif sample_width == 4:
format_char = 'i' # signed int
dtype = np.int32
max_val = 2147483648 # 2^31
offset = 0
else:
raise ValueError(f"Unsupported sample width: {sample_width}")

if len(audio_bytes) % sample_width != 0:
raise ValueError(f"Audio data length {len(audio_bytes)} is not divisible by sample width {sample_width}")

if sample_width == 1:
int_array = np.frombuffer(audio_bytes, dtype=dtype)
float_array = (int_array.astype(np.float32) + offset) / max_val
else:
int_array = np.frombuffer(audio_bytes, dtype=dtype)
# Нормализуем до диапазона [-1.0, 1.0]
float_array = int_array.astype(np.float32) / max_val

return float_array

model = whisper.load_model("base")
def transcribe_from_bytes(audio_bytes, sample_rate=16000):
try:
audio_array = bytes_to_float32_array(audio_bytes)
result = model.transcribe(
audio_array,
language="ru",
fp16=False
)

return result["text"]

except Exception as e:
print(f"Ошибка при транскрипции из байтов: {e}")
return {"text": "", "error": str(e)}

@app.websocket("/voice")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
print("WebSocket connection established")
audio_buffer = bytearray()

try:
while True:
data = await websocket.receive_bytes()
audio_buffer.extend(data)
transcription = transcribe_from_bytes(audio_buffer)
print(transcription)
await websocket.send_text("Audio chunk received")

except WebSocketDisconnect:
print("WebSocket disconnected")
await websocket.send_text(f"Final transcription error: {e}")
except Exception as e:
print(f"Error: {e}")
await websocket.close()
< /code>
Dieser Code sollte von der Website über WebSocket von Audio transkribieren. Aber aus irgendeinem Grund schreibt es immer "Quiet Music
Untertitel -Editor A. Sinetskaya Korrekturader A. Egorova"
Vielleicht konvertiere ich die Daten irgendwie nicht richtig, um an das Flüsterteil
zu senden, als ich versuchte, Audio in WAV -Datei zu speichern. Es gab nur weißes Rauschen < /p. < /P.>

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post