eac-translator / app.py
CRIMJOBBERS's picture
Update app.py
3de9eb1 verified
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
from datetime import datetime
import langid
import os
import pyttsx3
import time
import warnings
# Optional: suppress sacremoses warning
warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
# Set FFmpeg path explicitly (for pydub and audio playback)
os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin"
langid.set_languages(['en', 'fr', 'sw'])
MODEL_MAP = {
"English β†’ Swahili": "Helsinki-NLP/opus-mt-en-sw",
"English β†’ French": "Helsinki-NLP/opus-mt-en-fr",
"French β†’ English": "Helsinki-NLP/opus-mt-fr-en",
"French β†’ Swahili (via English)": ["Helsinki-NLP/opus-mt-fr-en", "Helsinki-NLP/opus-mt-en-sw"]
}
TONE_MODIFIERS = {
"Neutral": "",
"Romantic": "Express this romantically: ",
"Formal": "Translate this in a formal tone: ",
"Casual": "Make this sound casual: "
}
loaded_models = {}
def load_model(model_name):
if model_name not in loaded_models:
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
loaded_models[model_name] = (tokenizer, model)
return loaded_models[model_name]
def detect_language(text):
try:
lang, score = langid.classify(text)
return lang
except:
return "unknown"
def translate(text, direction, tone):
detected_lang = detect_language(text)
expected_src = direction.split(" β†’ ")[0].lower()
if expected_src.startswith("english") and detected_lang != "en":
warning = f"⚠ Detected language is '{detected_lang}', but you selected English as source."
elif expected_src.startswith("french") and detected_lang != "fr":
warning = f"⚠ Detected language is '{detected_lang}', but you selected French as source."
elif expected_src.startswith("swahili") and detected_lang != "sw":
warning = f"⚠ Detected language is '{detected_lang}', but you selected Swahili as source."
else:
warning = ""
prompt = TONE_MODIFIERS[tone] + text
model_info = MODEL_MAP[direction]
if isinstance(model_info, list):
tokenizer1, model1 = load_model(model_info[0])
encoded1 = tokenizer1(prompt, return_tensors="pt", padding=True, truncation=True)
intermediate = model1.generate(**encoded1)
intermediate_text = tokenizer1.decode(intermediate[0], skip_special_tokens=True)
tokenizer2, model2 = load_model(model_info[1])
encoded2 = tokenizer2(intermediate_text, return_tensors="pt", padding=True, truncation=True)
final = model2.generate(**encoded2)
translation = tokenizer2.decode(final[0], skip_special_tokens=True)
else:
tokenizer, model = load_model(model_info)
encoded = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
generated = model.generate(**encoded)
translation = tokenizer.decode(generated[0], skip_special_tokens=True)
with open("translation_log.txt", "a", encoding="utf-8") as f:
f.write(f"[{datetime.now()}] {direction} | Tone: {tone}\n")
f.write(f"Input: {text}\nOutput: {translation}\n\n")
return f"{warning}\n{translation}" if warning else translation
# Get available voices
engine = pyttsx3.init()
voices = engine.getProperty('voices')
voice_names = [voice.name for voice in voices]
def speak_text_to_file(text, voice_name):
try:
engine = pyttsx3.init()
engine.setProperty('rate', 150)
for voice in voices:
if voice.name == voice_name:
engine.setProperty('voice', voice.id)
break
output_path = "tts_output.wav"
engine.save_to_file(text, output_path)
engine.runAndWait()
return output_path
except Exception as e:
return None
def transcribe_and_translate(audio_path, direction, tone):
import speech_recognition as sr
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio = recognizer.record(source)
if len(audio.frame_data) < 10000:
return "⚠ Audio too short or empty. Please try again."
text = recognizer.recognize_google(audio)
return translate(text, direction, tone)
except Exception as e:
return f"⚠ Could not transcribe audio: {e}"
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## 🌍 EAC Translator")
gr.Markdown("Supports English, French, and Swahili. Includes tone control, language detection, voice input, and speech playback.")
with gr.Tabs():
with gr.Tab("πŸ“ Text Translation"):
input_text = gr.Textbox(label="Text to Translate", lines=3)
direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
output_text = gr.Textbox(label="Translated Text", lines=3)
voice_choice = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0])
audio_output = gr.Audio(label="Playback", interactive=False)
translate_btn = gr.Button("Translate")
speak_btn = gr.Button("πŸ”Š Speak Translation")
translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
speak_btn.click(fn=speak_text_to_file, inputs=[output_text, voice_choice], outputs=audio_output)
with gr.Tab("πŸŽ™ Voice Translation"):
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
voice_output = gr.Textbox(label="Translated Text")
voice_choice2 = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0])
audio_output2 = gr.Audio(label="Playback", interactive=False)
voice_translate_btn = gr.Button("Transcribe & Translate")
voice_speak_btn = gr.Button("πŸ”Š Speak Translation")
voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
voice_speak_btn.click(fn=speak_text_to_file, inputs=[voice_output, voice_choice2], outputs=audio_output2)
demo.launch()