eac-translator / app.py
CRIMJOBBERS's picture
Update app.py
3de9eb1 verified
raw
history blame
6.59 kB
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
from datetime import datetime
import langid
import os
import pyttsx3
import time
import warnings
# Optional: suppress sacremoses warning
warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
# Set FFmpeg path explicitly (for pydub and audio playback)
os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin"
langid.set_languages(['en', 'fr', 'sw'])
MODEL_MAP = {
"English β†’ Swahili": "Helsinki-NLP/opus-mt-en-sw",
"English β†’ French": "Helsinki-NLP/opus-mt-en-fr",
"French β†’ English": "Helsinki-NLP/opus-mt-fr-en",
"French β†’ Swahili (via English)": ["Helsinki-NLP/opus-mt-fr-en", "Helsinki-NLP/opus-mt-en-sw"]
}
TONE_MODIFIERS = {
"Neutral": "",
"Romantic": "Express this romantically: ",
"Formal": "Translate this in a formal tone: ",
"Casual": "Make this sound casual: "
}
loaded_models = {}
def load_model(model_name):
if model_name not in loaded_models:
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
loaded_models[model_name] = (tokenizer, model)
return loaded_models[model_name]
def detect_language(text):
try:
lang, score = langid.classify(text)
return lang
except:
return "unknown"
def translate(text, direction, tone):
detected_lang = detect_language(text)
expected_src = direction.split(" β†’ ")[0].lower()
if expected_src.startswith("english") and detected_lang != "en":
warning = f"⚠ Detected language is '{detected_lang}', but you selected English as source."
elif expected_src.startswith("french") and detected_lang != "fr":
warning = f"⚠ Detected language is '{detected_lang}', but you selected French as source."
elif expected_src.startswith("swahili") and detected_lang != "sw":
warning = f"⚠ Detected language is '{detected_lang}', but you selected Swahili as source."
else:
warning = ""
prompt = TONE_MODIFIERS[tone] + text
model_info = MODEL_MAP[direction]
if isinstance(model_info, list):
tokenizer1, model1 = load_model(model_info[0])
encoded1 = tokenizer1(prompt, return_tensors="pt", padding=True, truncation=True)
intermediate = model1.generate(**encoded1)
intermediate_text = tokenizer1.decode(intermediate[0], skip_special_tokens=True)
tokenizer2, model2 = load_model(model_info[1])
encoded2 = tokenizer2(intermediate_text, return_tensors="pt", padding=True, truncation=True)
final = model2.generate(**encoded2)
translation = tokenizer2.decode(final[0], skip_special_tokens=True)
else:
tokenizer, model = load_model(model_info)
encoded = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
generated = model.generate(**encoded)
translation = tokenizer.decode(generated[0], skip_special_tokens=True)
with open("translation_log.txt", "a", encoding="utf-8") as f:
f.write(f"[{datetime.now()}] {direction} | Tone: {tone}\n")
f.write(f"Input: {text}\nOutput: {translation}\n\n")
return f"{warning}\n{translation}" if warning else translation
# Get available voices
engine = pyttsx3.init()
voices = engine.getProperty('voices')
voice_names = [voice.name for voice in voices]
def speak_text_to_file(text, voice_name):
try:
engine = pyttsx3.init()
engine.setProperty('rate', 150)
for voice in voices:
if voice.name == voice_name:
engine.setProperty('voice', voice.id)
break
output_path = "tts_output.wav"
engine.save_to_file(text, output_path)
engine.runAndWait()
return output_path
except Exception as e:
return None
def transcribe_and_translate(audio_path, direction, tone):
import speech_recognition as sr
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio = recognizer.record(source)
if len(audio.frame_data) < 10000:
return "⚠ Audio too short or empty. Please try again."
text = recognizer.recognize_google(audio)
return translate(text, direction, tone)
except Exception as e:
return f"⚠ Could not transcribe audio: {e}"
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## 🌍 EAC Translator")
gr.Markdown("Supports English, French, and Swahili. Includes tone control, language detection, voice input, and speech playback.")
with gr.Tabs():
with gr.Tab("πŸ“ Text Translation"):
input_text = gr.Textbox(label="Text to Translate", lines=3)
direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
output_text = gr.Textbox(label="Translated Text", lines=3)
voice_choice = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0])
audio_output = gr.Audio(label="Playback", interactive=False)
translate_btn = gr.Button("Translate")
speak_btn = gr.Button("πŸ”Š Speak Translation")
translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
speak_btn.click(fn=speak_text_to_file, inputs=[output_text, voice_choice], outputs=audio_output)
with gr.Tab("πŸŽ™ Voice Translation"):
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
voice_output = gr.Textbox(label="Translated Text")
voice_choice2 = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0])
audio_output2 = gr.Audio(label="Playback", interactive=False)
voice_translate_btn = gr.Button("Transcribe & Translate")
voice_speak_btn = gr.Button("πŸ”Š Speak Translation")
voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
voice_speak_btn.click(fn=speak_text_to_file, inputs=[voice_output, voice_choice2], outputs=audio_output2)
demo.launch()