Bitte sagen Sie mir, wie Sie den Ausgabefehler lösen, wenn Sie Sprachanrufe über die Python AIortc -Kommunikation implem

Anonymous · Post by **Anonymous** » 17 Jul 2025, 08:57

Ich implementiere einen Videoanruf mit Python aiortc
Die Audioausgabe wird komisch herausgestellt. < /p>
Mein Ziel ist es, mit einem abgebildeten Fluss zu kommunizieren, aber ich habe ein Problem mit meiner Sprachausgabe, daher muss ich es reparieren.

Code: Select all

import sys, asyncio, threading
import cv2
import numpy as np
from aiohttp import web
from aiortc import RTCPeerConnection, RTCSessionDescription, VideoStreamTrack, AudioStreamTrack
from aiortc.contrib.media import MediaPlayer
from av import VideoFrame, AudioFrame
from PyQt6.QtWidgets import QApplication, QLabel, QWidget
from PyQt6.QtGui import QImage, QPixmap
from PyQt6.QtCore import QTimer
import sounddevice as sd
import math
import time
import queue
import fractions
import wave
from aiortc.mediastreams import MediaStreamError
from aiortc.codecs import get_capabilities

pcs = set()
clients = {}
last_frames = {}

class CameraVideoTrack(VideoStreamTrack):
def __init__(self):
super().__init__()
self.cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
self.last_sent = time.time()
self.fps = 20

async def recv(self):
now = time.time()
delay = max(0, (1 / self.fps) - (now - self.last_sent))
await asyncio.sleep(delay)
self.last_sent = time.time()

pts, time_base = await self.next_timestamp()
ret, frame = self.cap.read()
if not ret:
return None
video_frame = VideoFrame.from_ndarray(frame, format="bgr24")
video_frame.pts = pts
video_frame.time_base = time_base
return video_frame

class MicrophoneAudioTrack(AudioStreamTrack):
def __init__(self, device_index=1):
super().__init__()
self.q = queue.Queue()
self.sample_rate = 48000
self.frame_samples = 960
self.samples_sent = 0
self.wav_writer = wave.open("server_sent.wav", "wb")
self.wav_writer.setnchannels(1)
self.wav_writer.setsampwidth(2)
self.wav_writer.setframerate(self.sample_rate)

def callback(indata, frames, time_info, status):
if status:
print("[⚠️ 마이크 상태]", status)
if indata.shape[0] == self.frame_samples:
self.q.put(indata.copy())
self.wav_writer.writeframes(indata.tobytes())
else:
print(f"[경고] 프레임 길이 불일치: {indata.shape}")

self.stream = sd.InputStream(
samplerate=self.sample_rate,
blocksize=self.frame_samples,
channels=1,
dtype='int16',
callback=callback,
device=device_index
)
self.stream.start()

async def recv(self):
while self.q.empty():
await asyncio.sleep(0.001)

data = self.q.get()
data = data.T  # shape (1, 960)
frame = AudioFrame.from_ndarray(data, layout="mono", format="s16")
frame.sample_rate = self.sample_rate
frame.time_base = fractions.Fraction(1, self.sample_rate)
frame.pts = self.samples_sent
frame.duration = self.frame_samples
self.samples_sent += self.frame_samples
return frame

def __del__(self):
try:
self.wav_writer.close()
except Exception as e:
print("[WAV 종료 오류]", e)

class MultiAudioPlayer:
def __init__(self, sample_rate=48000, frame_samples=960, output_device=3):
self.sample_rate = sample_rate
self.frame_samples = frame_samples  # ✅ 이 줄이 꼭 필요
self.buffer = asyncio.Queue(maxsize=50)
self.running = True
self.stream = sd.OutputStream(
samplerate=sample_rate,
channels=1,
dtype='int16',
device=output_device
)
self.wav_writer = wave.open("server_received.wav", "wb")
self.wav_writer.setnchannels(1)
self.wav_writer.setsampwidth(2)
self.wav_writer.setframerate(sample_rate)
asyncio.create_task(self.audio_loop())

def add_frame(self, frame):
try:
pcm = frame.to_ndarray()
print(f"[add_frame] frame.to_ndarray() shape={pcm.shape}, dtype={pcm.dtype}")

if pcm.ndim == 2 and pcm.shape[0] >  1:
pcm = pcm[0, :]
elif pcm.ndim == 2:
pcm = pcm.flatten()

if pcm.dtype == np.float32:
pcm = np.clip(pcm, -1.0, 1.0)
pcm = (pcm * 32767).astype(np.int16)

total_samples = pcm.shape[0]
if total_samples % self.frame_samples != 0:
print(f"[⚠️ 프레임 크기 불일치: {pcm.shape}], 자르고 패딩")
total_samples = (total_samples // self.frame_samples) * self.frame_samples
pcm = pcm[:total_samples]

chunks = np.split(pcm, total_samples // self.frame_samples)
for chunk in chunks:
try:
self.buffer.put_nowait(chunk)
except asyncio.QueueFull:
print("[경고] 버퍼가 가득 찼습니다")
except Exception as e:
print(f"[MultiAudioPlayer] 오류: {e}")

async def audio_loop(self):
await asyncio.sleep(0.5)
self.stream.start()
print("[🎧 출력 시작]")
silence = np.zeros(self.frame_samples, dtype=np.int16)

try:
while self.running:
try:
pcm = await asyncio.wait_for(self.buffer.get(), timeout=0.05)
print(f"[audio_loop] pcm.shape={pcm.shape}")
except asyncio.TimeoutError:
pcm = silence

if pcm.shape[0] != self.frame_samples:
print(f"[audio_loop] 프레임 길이 불일치: {pcm.shape[0]} 패딩 처리")
pad = np.zeros(self.frame_samples, dtype=np.int16)
n = min(len(pcm), self.frame_samples)
pad[:n] = pcm[:n]
pcm = pad

self.stream.write(pcm)
self.wav_writer.writeframes(pcm.tobytes())
except Exception as e:
print("[AudioLoop 예외]", e)
finally:
self.stream.stop()
self.stream.close()
self.wav_writer.close()
def stop(self):
self.running = False

async def play_audio(track):
player = MultiAudioPlayer()
try:
while True:
try:
frame = await track.recv()
except MediaStreamError:
print("[🛑 오디오 트랙 종료됨]")
break
player.add_frame(frame)
finally:
player.stop()

async def consume_video(track, client_ip):
print(f"[서버] 비디오 트랙 수신 시작: {client_ip}")
while True:
frame = await track.recv()
last_frames[client_ip] = frame.to_ndarray(format="bgr24")

async def offer(request):
client_ip = request.remote
params = await request.json()
offer = RTCSessionDescription(sdp=params["sdp"], type=params["type"])

pc = RTCPeerConnection()
pcs.add(pc)
clients[pc] = client_ip

opus_caps = get_capabilities("audio")
for transceiver in pc.getTransceivers():
if transceiver.kind == "audio":
transceiver.setCodecPreferences([
codec for codec in opus_caps.codecs if codec.name == "opus"
])

pc.addTransceiver("video", direction="sendrecv")
pc.addTransceiver("audio", direction="sendrecv")
pc.addTrack(CameraVideoTrack())
pc.addTrack(MicrophoneAudioTrack())

@pc.on("track")
def on_track(track):
if track.kind == "video":
asyncio.create_task(consume_video(track, client_ip))
elif track.kind == "audio":
asyncio.create_task(play_audio(track))

await pc.setRemoteDescription(offer)
answer = await pc.createAnswer()
await pc.setLocalDescription(answer)

return web.json_response({
"sdp": pc.localDescription.sdp,
"type":  pc.localDescription.type
})

async def app_runner():
app = web.Application()
app.router.add_post("/offer", offer)
runner = web.AppRunner(app)
await runner.setup()
site = web.TCPSite(runner, "0.0.0.0", 8080)
await site.start()
print("[서버] 8080 포트 대기 중...")

class GridVideoWindow(QWidget):
def __init__(self):
super().__init__()
self.setWindowTitle("서버 - 영상 그리드")
self.resize(1280, 720)
self.label = QLabel(self)
self.label.resize(1280, 720)
self.timer = QTimer()
self.timer.timeout.connect(self.update_grid)
self.timer.start(30)

def update_grid(self):
frames = list(last_frames.values())
addresses = list(last_frames.keys())
if not frames:
return

rows, cols = get_grid_size(len(frames))
gap = 5
grid_w, grid_h = self.label.width(), self.label.height()
cell_w = (grid_w - (cols + 1) * gap) // cols
cell_h = (grid_h - (rows + 1) * gap) // rows
grid = np.ones((grid_h, grid_w, 3), dtype=np.uint8) * 30

for idx, frame in enumerate(frames):
resized = cv2.resize(frame, (cell_w, cell_h))
r, c = divmod(idx, cols)
y1 = gap + r * (cell_h + gap)
x1 = gap + c * (cell_w + gap)
y2 = y1 + cell_h
x2 = x1 + cell_w
grid[y1:y2, x1:x2] = resized

sClientId = addresses[idx].split(".")[-1]
cv2.putText(grid, sClientId, (x1+10, y1+30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)

h, w, ch = grid.shape
img = QImage(grid.data, w, h, ch * w, QImage.Format.Format_BGR888)
self.label.setPixmap(QPixmap.fromImage(img))

def get_grid_size(n):
if n == 1:
return (1, 1)
elif 2

Bitte sagen Sie mir, wie Sie den Ausgabefehler lösen, wenn Sie Sprachanrufe über die Python AIortc -Kommunikation implem

Bitte sagen Sie mir, wie Sie den Ausgabefehler lösen, wenn Sie Sprachanrufe über die Python AIortc -Kommunikation implem ⇐ Python

Quick Reply