Spaces:
Running
Running
"""Language Tutor Application | |
This script provides a Gradio-based web interface for a language tutoring assistant. | |
It uses OpenAI's GPT-4 model to generate language-specific responses and Speechify's | |
text-to-speech service to synthesize audio in multiple languages (Portuguese, French, Spanish). | |
The application supports running both locally and in Hugging Face Spaces environments. | |
""" | |
import os | |
import base64 | |
import uuid | |
import gradio as gr | |
from openai import OpenAI | |
from speechify import Speechify | |
from dotenv import load_dotenv | |
# Detect Hugging Face environment | |
RUNNING_IN_SPACES = os.getenv("SYSTEM") == "spaces" | |
# Load API keys | |
# Load environment variables from .env when not running in Spaces | |
if not RUNNING_IN_SPACES: | |
load_dotenv() | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
speechify_api_key = os.getenv("SPEECHIFY_API_KEY") | |
# Sanity check (but don't print full keys) | |
print(f"✅ OPENAI_API_KEY loaded: {'✅' if openai_api_key else '❌ MISSING'}") | |
print(f"✅ SPEECHIFY_API_KEY loaded: {'✅' if speechify_api_key else '❌ MISSING'}") | |
# Initialize clients | |
openai_client = OpenAI(api_key=openai_api_key) | |
speechify_client = Speechify(token=speechify_api_key) | |
# Voice config | |
language_config = { | |
"Portuguese": { | |
"voice_id": "joao", | |
"language": "pt-PT", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
"French": { | |
"voice_id": "leo", | |
"language": "fr-FR", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
"Spanish": { | |
"voice_id": "danna-sofia", | |
"language": "es-MX", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
} | |
def chat_and_speak(user_input, language_choice, history): | |
# Step 0: Initialize response variables | |
gpt_response = "" | |
audio_output_path = None | |
try: | |
# Step 1: Input validation | |
if not user_input or not user_input.strip(): | |
return None, "Please enter some text to process.", history | |
print(f"🧠 User input: {user_input}") | |
print(f"🗣️ Language choice: {language_choice}") | |
# Build messages with history for GPT interaction | |
system_message = f"You are a friendly {language_choice} language tutor. Respond only in {language_choice}." | |
messages = [{"role": "system", "content": system_message}] | |
if history: | |
for user_msg, assistant_msg in history: | |
messages.append({"role": "user", "content": user_msg}) | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
messages.append({"role": "user", "content": user_input}) | |
# Step 2: GPT interaction to generate response | |
completion = openai_client.chat.completions.create( | |
model="gpt-4", | |
messages=messages | |
) | |
gpt_response = completion.choices[0].message.content | |
print(f"💬 GPT response: {gpt_response}") | |
# Step 3: Voice synthesis using Speechify | |
config = language_config.get(language_choice) | |
if not config: | |
error_msg = f"⚠️ Language '{language_choice}' not supported." | |
print(error_msg) | |
return None, f"{gpt_response}\n\n{error_msg}", history | |
tts_response = speechify_client.tts.audio.speech( | |
input=gpt_response, | |
voice_id=config["voice_id"], | |
model=config["model"], | |
audio_format=config["audio_format"] | |
) | |
if hasattr(tts_response, "audio_data") and isinstance(tts_response.audio_data, str) and tts_response.audio_data: | |
try: | |
audio_bytes = base64.b64decode(tts_response.audio_data) | |
output_dir = "/tmp" if RUNNING_IN_SPACES else "speech_files" | |
os.makedirs(output_dir, exist_ok=True) | |
audio_output_path = os.path.join(output_dir, f"speech_{uuid.uuid4().hex}.mp3") | |
with open(audio_output_path, "wb") as f: | |
f.write(audio_bytes) | |
except Exception as audio_err: | |
print(f"🔥 Error processing audio data: {audio_err}") | |
return None, f"{gpt_response}\n\n⚠️ Error saving audio: {audio_err}", history | |
else: | |
print("⚠️ No audio data received from Speechify or audio_data is not a string.") | |
return None, f"{gpt_response}\n\n⚠️ No audio data received from Speechify.", history | |
# Append new interaction to history | |
history = history or [] | |
history.append((user_input, gpt_response)) | |
return audio_output_path, gpt_response, history | |
except Exception as e: | |
# Step 4: Error handling | |
print(f"🔥 An unexpected error occurred: {e}") | |
error_message = f"⚠️ An unexpected error occurred: {e}" | |
if gpt_response: | |
return None, f"{gpt_response}\n\n{error_message}", history | |
return None, error_message, history | |
# Load custom CSS for UI styling | |
with open("custom.css") as f: | |
custom_css = f.read() | |
# Define Gradio UI layout | |
with gr.Blocks(css=custom_css) as demo: | |
gr.HTML( | |
'<div class="custom-bar"><span class="custom-bar-title">Language Tutor</span></div>' | |
) | |
with gr.Column(elem_classes="main-card"): | |
with gr.Row(): | |
with gr.Column(): | |
user_input = gr.Textbox(label="Type in whatever language you prefer", placeholder="Type here...", lines=4) | |
language_choice = gr.Dropdown( | |
choices=["Portuguese", "French", "Spanish"], | |
value="Portuguese", | |
label="Language" | |
) | |
submit_btn = gr.Button("Submit") | |
chat_history = gr.State([]) | |
with gr.Column(): | |
audio_output = gr.Audio(label="Audio Playback", type="filepath", autoplay=True) | |
gpt_output = gr.Textbox(label="The Response") | |
submit_btn.click( | |
fn=chat_and_speak, | |
inputs=[user_input, language_choice, chat_history], | |
outputs=[audio_output, gpt_output, chat_history] | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
demo.launch() |