Spaces:
Sleeping
Sleeping
"""Language Tutor Application | |
This script provides a Gradio-based web interface for a language tutoring assistant. | |
It uses OpenAI's GPT-4 model to generate language-specific responses and Speechify's | |
text-to-speech service to synthesize audio in multiple languages (Portuguese, French, Spanish). | |
The application supports running both locally and in Hugging Face Spaces environments. | |
""" | |
import os | |
import base64 | |
import uuid | |
import gradio as gr | |
from openai import OpenAI | |
from speechify import Speechify | |
from dotenv import load_dotenv | |
# Detect Hugging Face environment | |
RUNNING_IN_SPACES = os.getenv("SYSTEM") == "spaces" | |
# Load API keys | |
# Load environment variables from .env when not running in Spaces | |
if not RUNNING_IN_SPACES: | |
load_dotenv() | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
speechify_api_key = os.getenv("SPEECHIFY_API_KEY") | |
# Sanity check (but don't print full keys) | |
print(f"β OPENAI_API_KEY loaded: {'β ' if openai_api_key else 'β MISSING'}") | |
print(f"β SPEECHIFY_API_KEY loaded: {'β ' if speechify_api_key else 'β MISSING'}") | |
# Initialize clients | |
openai_client = OpenAI(api_key=openai_api_key) | |
speechify_client = Speechify(token=speechify_api_key) | |
# Voice config | |
language_config = { | |
"Portuguese": { | |
"voice_id": "agueda", | |
"language": "pt-PT", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
"French": { | |
"voice_id": "leo", | |
"language": "fr-FR", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
"Spanish": { | |
"voice_id": "danna-sofia", | |
"language": "es-MX", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
"Korean": { | |
"voice_id": "yoon-jung", | |
"language": "ko-KR", | |
"model": "simba-multilingual", | |
"audio_format": "mp3" | |
}, | |
} | |
def chat_and_speak(user_input, language_choice, history, show_translation): | |
# Step 0: Initialize response variables | |
gpt_response = "" | |
english_translation = "" | |
audio_output_path = None | |
try: | |
# Step 1: Input validation | |
if not user_input or not user_input.strip(): | |
return None, ("", ""), "Please enter some text to process.", history | |
print(f"π§ User input: {user_input}") | |
print(f"π£οΈ Language choice: {language_choice}") | |
# Build messages with history for GPT interaction | |
system_message = f"You are a friendly {language_choice} language tutor. Respond only in {language_choice}." | |
messages = [{"role": "system", "content": system_message}] | |
if history: | |
for user_msg, assistant_msg in history: | |
messages.append({"role": "user", "content": user_msg}) | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
messages.append({"role": "user", "content": user_input}) | |
# Step 2: GPT interaction to generate response | |
completion = openai_client.chat.completions.create( | |
model="gpt-4", | |
messages=messages | |
) | |
gpt_response = completion.choices[0].message.content | |
print(f"π¬ GPT response: {gpt_response}") | |
# Step 2b: Get English translation | |
translation_prompt = f"Translate the following text to English:\n\n{gpt_response}" | |
translation_completion = openai_client.chat.completions.create( | |
model="gpt-4", | |
messages=[{"role": "system", "content": "You translate text to English."}, | |
{"role": "user", "content": translation_prompt}] | |
) | |
english_translation = translation_completion.choices[0].message.content | |
print(f"π English translation: {english_translation}") | |
# Step 3: Voice synthesis using Speechify | |
config = language_config.get(language_choice) | |
if not config: | |
error_msg = f"β οΈ Language '{language_choice}' not supported." | |
print(error_msg) | |
return None, (gpt_response, english_translation), f"{gpt_response}\n\n{error_msg}", history | |
tts_response = speechify_client.tts.audio.speech( | |
input=gpt_response, | |
voice_id=config["voice_id"], | |
model=config["model"], | |
audio_format=config["audio_format"] | |
) | |
if hasattr(tts_response, "audio_data") and isinstance(tts_response.audio_data, str) and tts_response.audio_data: | |
try: | |
audio_bytes = base64.b64decode(tts_response.audio_data) | |
output_dir = "/tmp" if RUNNING_IN_SPACES else "speech_files" | |
os.makedirs(output_dir, exist_ok=True) | |
audio_output_path = os.path.join(output_dir, f"speech_{uuid.uuid4().hex}.mp3") | |
with open(audio_output_path, "wb") as f: | |
f.write(audio_bytes) | |
except Exception as audio_err: | |
print(f"π₯ Error processing audio data: {audio_err}") | |
return None, (gpt_response, english_translation), f"{gpt_response}\n\nβ οΈ Error saving audio: {audio_err}", history | |
else: | |
print("β οΈ No audio data received from Speechify or audio_data is not a string.") | |
return None, (gpt_response, english_translation), f"{gpt_response}\n\nβ οΈ No audio data received from Speechify.", history | |
# Append new interaction to history | |
history = history or [] | |
history.append((user_input, gpt_response)) | |
return audio_output_path, (gpt_response, english_translation), history | |
except Exception as e: | |
# Step 4: Error handling | |
print(f"π₯ An unexpected error occurred: {e}") | |
error_message = f"β οΈ An unexpected error occurred: {e}" | |
if gpt_response: | |
return None, (gpt_response, english_translation), f"{gpt_response}\n\n{error_message}", history | |
return None, ("", ""), error_message, history | |
# Load custom CSS for UI styling | |
with open("custom.css") as f: | |
custom_css = f.read() | |
def update_display_text(chat_output_pair, show_translation): | |
original, translated = chat_output_pair or ("", "") | |
return translated if show_translation and translated else original | |
# Toggle translation display helper | |
def toggle_translation(chat_output_pair, show_translation): | |
return update_display_text(chat_output_pair, show_translation) | |
# Define Gradio UI layout | |
with gr.Blocks(css=custom_css) as demo: | |
gr.HTML( | |
'<div class="custom-bar"><span class="custom-bar-title">Language Tutor</span></div>' | |
) | |
with gr.Column(elem_classes="main-card"): | |
with gr.Row(): | |
with gr.Column(): | |
user_input = gr.Textbox(label="Type in whatever language you prefer", placeholder="Type here...", lines=4) | |
language_choice = gr.Dropdown( | |
choices=["Portuguese", "French", "Spanish", "Korean"], | |
value="Portuguese", | |
label="Language" | |
) | |
show_translation = gr.Checkbox(label="Show English Translation", value=False) | |
submit_btn = gr.Button("Submit") | |
chat_history = gr.State([]) | |
chat_output_pair = gr.State(("", "")) # (original, translation) | |
with gr.Column(): | |
audio_output = gr.Audio(label="Audio Playback", type="filepath", autoplay=True) | |
gpt_output = gr.Textbox(label="The Response") | |
submit_btn.click( | |
fn=chat_and_speak, | |
inputs=[user_input, language_choice, chat_history, show_translation], | |
outputs=[audio_output, chat_output_pair, chat_history] | |
).then( | |
fn=update_display_text, | |
inputs=[chat_output_pair, show_translation], | |
outputs=gpt_output | |
) | |
show_translation.change( | |
fn=toggle_translation, | |
inputs=[chat_output_pair, show_translation], | |
outputs=gpt_output | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
demo.launch() |