So senden Sie Audio -Dateien Medien an einen Telefonanruf von Twilio StreamPython

Python-Programme
Anonymous
 So senden Sie Audio -Dateien Medien an einen Telefonanruf von Twilio Stream

Post by Anonymous »

Ich erstelle im Grunde genommen einen einfachen GPT -Assistenten über Telefonanrufe mit Twilio, wobei die Google Cloud -Sprache IM die Eingabe des Benutzers übertragen. Im Moment habe ich ein Beispiel TTS_Output4 eine Audiodatei, die ich nach Abschluss der Transkription gespielt werden möchte. Ich habe verschiedene Quellen angesehen, kann aber nichts finden, um das Audio abzuspielen.

Code: Select all

import base64
import json
import threading
from io import BytesIO
import time
from pydub import AudioSegment
current_sentence = ""
last_timestamp = 0
sentence_timeout = 1.5

from flask import Flask, render_template
from flask_sockets import Sockets
from google.cloud.speech import RecognitionConfig, StreamingRecognitionConfig
from gtts import gTTS
import os
import openai  # GPT-4 integration

from SpeechClientBridge import SpeechClientBridge

# GPT-4 API setup
openai.api_key = os.getenviron("OpenAI_API_key")

HTTP_SERVER_PORT = 8080

config = RecognitionConfig(
encoding=RecognitionConfig.AudioEncoding.MULAW,
sample_rate_hertz=8000,
language_code="en-US",
)
streaming_config = StreamingRecognitionConfig(config=config, interim_results=True)

app = Flask(__name__)
sockets = Sockets(app)

@app.route("/twiml", methods=["POST"])
def return_twiml():
print("POST TwiML")
return render_template("streams.xml")

def on_transcription_response(response, ws):
global current_sentence, last_timestamp

if not response.results:
return

result = response.results[0]
if not result.alternatives:
return

transcription = result.alternatives[0].transcript
stability = result.stability
is_final = result.is_final

current_time = time.time()

if is_final or (current_time - last_timestamp > sentence_timeout and stability > 0.8):
# Append only the new part of the transcription
if len(transcription) > len(current_sentence):
current_sentence += transcription[len(current_sentence):]

print("Complete Sentence:", current_sentence)

gpt_response = get_gpt_response(current_sentence)
print("GPT Response:", gpt_response)
send_static_audio(ws)

# send_gpt_response_as_audio(gpt_response, ws)

current_sentence = ""
last_timestamp = current_time
else:

if len(transcription) >  len(current_sentence):
current_sentence = transcription
last_timestamp = current_time

def get_gpt_response(prompt):
"""Get response from GPT-4 using the v1/chat/completions endpoint."""
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
max_tokens=150
)
return response['choices'][0]['message']['content'].strip()

def send_gpt_response_as_audio(text, ws):
try:
# Convert the GPT response to speech (TTS)
tts = gTTS(text, lang='en')
audio_fp = BytesIO()
tts.write_to_fp(audio_fp)
audio_fp.seek(0)

# Convert the TTS audio to the required format for Twilio
audio_segment = AudioSegment.from_mp3(audio_fp)
audio_segment = audio_segment.set_frame_rate(8000).set_channels(1).set_sample_width(1)
audio_data = BytesIO()
audio_segment.export(audio_data, format="wav", codec="pcm_mulaw")
audio_data = audio_data.getvalue()

# Split and send the audio data in chunks
CHUNK_SIZE = 1024  # Adjust chunk size if necessary
for i in range(0, len(audio_data), CHUNK_SIZE):
chunk = audio_data[i:i + CHUNK_SIZE]
audio_b64 = base64.b64encode(chunk).decode('utf-8')
message = json.dumps({
"event": "media",
"media": {
"payload": audio_b64
}
})
ws.send(message)
# sleep(0.1)  # Small delay to prevent overwhelming the WebSocket connection

print("Audio sent successfully")

except Exception as e:
print(f"Error sending audio: {e}")

def send_static_audio(ws):
try:
# Load a static audio file (in PCM mu-law format)
with open("tts_output4.wav", "rb") as f:
audio_data = f.read()

audio_b64 = base64.b64encode(audio_data).decode('utf-8')
message = json.dumps({
"event": "media",
"media": {
"payload": audio_b64
}
})
ws.send(message)
# sleep(0.1)  # Small delay between chunks

print("Static audio sent successfully")
except Exception as e:
print(f"Error sending static audio: {e}")

def convert_text_to_speech(text, save_locally=True):
print(f"Converting text to speech: {text}")
try:
tts = gTTS(text, lang='en')
audio_fp = BytesIO()
tts.write_to_fp(audio_fp)
audio_fp.seek(0)
audio_data = audio_fp.read()
print(f"TTS conversion complete, audio data length: {len(audio_data)}")

if save_locally:
with open("tts_output.mp3", "wb") as f:
f.write(audio_data)
print("TTS output saved locally as tts_output.mp3")

return audio_data
except Exception as e:
print(f"Error in TTS conversion: {e}")
return None

def convert_audio_for_twilio(audio_data):
"""Convert audio to 8-bit mu-law audio at 8kHz."""
audio = AudioSegment.from_mp3(BytesIO(audio_data))
audio = audio.set_frame_rate(8000).set_channels(1)
buffer = BytesIO()
audio.export(buffer, format="wav", codec="pcm_mulaw")
return buffer.getvalue()

def send_audio_to_twilio(audio_data, ws):
"""Send the audio data to Twilio via WebSocket."""
if ws.closed:
print("WebSocket is closed.  Cannot send audio.")
return
audio_b64 = base64.b64encode(audio_data).decode('utf-8')
try:
message = json.dumps({
"event": "media",
"media": {
"payload": audio_b64
}
})
print(f"Sending audio message of length: {len(message)}")
ws.send(message)
print("Audio sent successfully")
except Exception as e:
print(f"Error sending audio: {e}")

@sockets.route("/")
def transcript(ws):
print("WS connection opened")
bridge = SpeechClientBridge(streaming_config, lambda response: on_transcription_response(response, ws))
t = threading.Thread(target=bridge.start)
t.start()

while not ws.closed:
message = ws.receive()
if message is None:
bridge.add_request(None)
bridge.terminate()
break

data = json.loads(message)
if data["event"] in ("connected", "start"):
print(f"Media WS: Received event '{data['event']}': {message}")
continue
if data["event"] == "media":
media = data["media"]
chunk = base64.b64decode(media["payload"])
bridge.add_request(chunk)
if data["event"] == "stop":
print(f"Media WS: Received event 'stop': {message}")
print("Stopping...")
break

bridge.terminate()
print("WS connection closed")

if __name__ == "__main__":
from gevent import pywsgi
from geventwebsocket.handler import WebSocketHandler

server = pywsgi.WSGIServer(
("", HTTP_SERVER_PORT), app, handler_class=WebSocketHandler
)
print("Server listening on: http://localhost:" + str(HTTP_SERVER_PORT))
server.serve_forever()
< /code>
Und nur als Referenz ist dies meine Streams.xml -Datei: < /p>







Jede Code -Hilfe wird geschätzt :)

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post