Spaces:
Sleeping
Sleeping
File size: 1,690 Bytes
7f0f737 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
import uuid
import os
import requests
import base64
TTS_OUTPUT_DIR = "./tmp"
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
def process_audio(audio):
with open(audio, "rb") as f:
res = requests.post("http://localhost:8000/process_audio", files={"file": f})
result = res.json()
audio_data = base64.b64decode(result["audio"])
with open(f"{TTS_OUTPUT_DIR}/response.wav", "wb") as f:
f.write(audio_data)
with open(f"{TTS_OUTPUT_DIR}/asr.txt", "w") as f:
f.write(result['asr_text'])
with open(f"{TTS_OUTPUT_DIR}/llm.txt", "w") as f:
f.write(result['llm_text'])
return f"""
asr_text: {result['asr_text']}
llm_text: {result['llm_text']}
""", f"{TTS_OUTPUT_DIR}/response.wav"
def on_click_metrics():
res = requests.get("http://localhost:8000/metrics")
return res.content.decode('utf-8')
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=1):
gr.Image(value="character.png", show_label=False) # キャラ絵を表示
with gr.Column(scale=2):
mic = gr.Audio(sources=["microphone"], type="filepath", label="Mic")
text_output = gr.Textbox(label="transcription")
audio_output = gr.Audio(label="audio", autoplay=True)
mic.change(fn=process_audio, inputs=[mic], outputs=[text_output, audio_output])
with gr.Row():
metrics_button = gr.Button("compute metrics")
metrics_output = gr.Textbox(label="Metrics", lines=3)
metrics_button.click(fn=on_click_metrics, inputs=[], outputs=[metrics_output])
with gr.Row():
log = gr.Textbox(label="logs", lines=5)
demo.launch()
|