Spaces:
Running
Running
import os | |
import base64 | |
import uuid | |
import gradio as gr | |
from openai import OpenAI | |
from speechify import Speechify | |
from dotenv import load_dotenv | |
# Detect Hugging Face environment | |
RUNNING_IN_SPACES = os.getenv("SYSTEM") == "spaces" | |
# Load API keys | |
if not RUNNING_IN_SPACES: | |
load_dotenv() | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
speechify_api_key = os.getenv("SPEECHIFY_API_KEY") | |
# Sanity check (but don't print full keys) | |
print(f"β OPENAI_API_KEY loaded: {'β ' if openai_api_key else 'β MISSING'}") | |
print(f"β SPEECHIFY_API_KEY loaded: {'β ' if speechify_api_key else 'β MISSING'}") | |
# Initialize clients | |
openai_client = OpenAI(api_key=openai_api_key) | |
speechify_client = Speechify(token=speechify_api_key) | |
# Voice config | |
language_config = { | |
"Portuguese": { | |
"voice_id": "joao", | |
"language": "pt-PT", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
"French": { | |
"voice_id": "leo", | |
"language": "fr-FR", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
"Spanish": { | |
"voice_id": "danna-sofia", | |
"language": "es-MX", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
} | |
def chat_and_speak(user_input, language_choice): | |
gpt_response = "" | |
audio_output_path = None | |
try: | |
if not user_input or not user_input.strip(): | |
return None, "Please enter some text to process." | |
print(f"π§ User input: {user_input}") | |
print(f"π£οΈ Language choice: {language_choice}") | |
# Step 1: Get GPT response | |
system_message = f"You are a friendly {language_choice} language tutor. Respond only in {language_choice}." | |
completion = openai_client.chat.completions.create( | |
model="gpt-4", | |
messages=[ | |
{"role": "system", "content": system_message}, | |
{"role": "user", "content": user_input} | |
] | |
) | |
gpt_response = completion.choices[0].message.content | |
print(f"π¬ GPT response: {gpt_response}") | |
# Step 2: Use Speechify to generate audio | |
config = language_config.get(language_choice) | |
if not config: | |
error_msg = f"β οΈ Language '{language_choice}' not supported." | |
print(error_msg) | |
return None, f"{gpt_response}\n\n{error_msg}" | |
tts_response = speechify_client.tts.audio.speech( | |
input=gpt_response, | |
voice_id=config["voice_id"], | |
model=config["model"], | |
audio_format=config["audio_format"] | |
) | |
if hasattr(tts_response, "audio_data") and isinstance(tts_response.audio_data, str) and tts_response.audio_data: | |
try: | |
audio_bytes = base64.b64decode(tts_response.audio_data) | |
output_dir = "/tmp" if RUNNING_IN_SPACES else "speech_files" | |
os.makedirs(output_dir, exist_ok=True) | |
audio_output_path = os.path.join(output_dir, f"speech_{uuid.uuid4().hex}.mp3") | |
with open(audio_output_path, "wb") as f: | |
f.write(audio_bytes) | |
except Exception as audio_err: | |
print(f"π₯ Error processing audio data: {audio_err}") | |
return None, f"{gpt_response}\n\nβ οΈ Error saving audio: {audio_err}" | |
else: | |
print("β οΈ No audio data received from Speechify or audio_data is not a string.") | |
return None, f"{gpt_response}\n\nβ οΈ No audio data received from Speechify." | |
return audio_output_path, gpt_response | |
except Exception as e: | |
print(f"π₯ An unexpected error occurred: {e}") | |
error_message = f"β οΈ An unexpected error occurred: {e}" | |
if gpt_response: | |
return None, f"{gpt_response}\n\n{error_message}" | |
return None, error_message | |
with open("custom.css") as f: | |
custom_css = f.read() | |
with gr.Blocks(css=custom_css) as demo: | |
gr.HTML( | |
'<div class="custom-bar"><span class="custom-bar-title">Language Tutor</span></div>' | |
) | |
with gr.Column(elem_classes="main-card"): | |
with gr.Row(): | |
with gr.Column(): | |
user_input = gr.Textbox(label="Type in whatever language you prefer", placeholder="Type here...", lines=4) | |
language_choice = gr.Dropdown( | |
choices=["Portuguese", "French", "Spanish"], | |
value="Portuguese", | |
label="Language" | |
) | |
submit_btn = gr.Button("Submit") | |
with gr.Column(): | |
audio_output = gr.Audio(label="Audio Playback", type="filepath", autoplay=True) | |
gpt_output = gr.Textbox(label="The Response") | |
submit_btn.click( | |
fn=chat_and_speak, | |
inputs=[user_input, language_choice], | |
outputs=[audio_output, gpt_output] | |
) | |
if __name__ == "__main__": | |
demo.launch() |