import gradio as gr from transformers import MarianMTModel, MarianTokenizer from datetime import datetime import langid import os import pyttsx3 import time import warnings # Optional: suppress sacremoses warning warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.") # Set FFmpeg path explicitly (for pydub and audio playback) os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin" langid.set_languages(['en', 'fr', 'sw']) MODEL_MAP = { "English → Swahili": "Helsinki-NLP/opus-mt-en-sw", "English → French": "Helsinki-NLP/opus-mt-en-fr", "French → English": "Helsinki-NLP/opus-mt-fr-en", "French → Swahili (via English)": ["Helsinki-NLP/opus-mt-fr-en", "Helsinki-NLP/opus-mt-en-sw"] } TONE_MODIFIERS = { "Neutral": "", "Romantic": "Express this romantically: ", "Formal": "Translate this in a formal tone: ", "Casual": "Make this sound casual: " } loaded_models = {} def load_model(model_name): if model_name not in loaded_models: tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) loaded_models[model_name] = (tokenizer, model) return loaded_models[model_name] def detect_language(text): try: lang, score = langid.classify(text) return lang except: return "unknown" def translate(text, direction, tone): detected_lang = detect_language(text) expected_src = direction.split(" → ")[0].lower() if expected_src.startswith("english") and detected_lang != "en": warning = f"⚠ Detected language is '{detected_lang}', but you selected English as source." elif expected_src.startswith("french") and detected_lang != "fr": warning = f"⚠ Detected language is '{detected_lang}', but you selected French as source." elif expected_src.startswith("swahili") and detected_lang != "sw": warning = f"⚠ Detected language is '{detected_lang}', but you selected Swahili as source." else: warning = "" prompt = TONE_MODIFIERS[tone] + text model_info = MODEL_MAP[direction] if isinstance(model_info, list): tokenizer1, model1 = load_model(model_info[0]) encoded1 = tokenizer1(prompt, return_tensors="pt", padding=True, truncation=True) intermediate = model1.generate(**encoded1) intermediate_text = tokenizer1.decode(intermediate[0], skip_special_tokens=True) tokenizer2, model2 = load_model(model_info[1]) encoded2 = tokenizer2(intermediate_text, return_tensors="pt", padding=True, truncation=True) final = model2.generate(**encoded2) translation = tokenizer2.decode(final[0], skip_special_tokens=True) else: tokenizer, model = load_model(model_info) encoded = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True) generated = model.generate(**encoded) translation = tokenizer.decode(generated[0], skip_special_tokens=True) with open("translation_log.txt", "a", encoding="utf-8") as f: f.write(f"[{datetime.now()}] {direction} | Tone: {tone}\n") f.write(f"Input: {text}\nOutput: {translation}\n\n") return f"{warning}\n{translation}" if warning else translation # Get available voices engine = pyttsx3.init() voices = engine.getProperty('voices') voice_names = [voice.name for voice in voices] def speak_text_to_file(text, voice_name): try: engine = pyttsx3.init() engine.setProperty('rate', 150) for voice in voices: if voice.name == voice_name: engine.setProperty('voice', voice.id) break output_path = "tts_output.wav" engine.save_to_file(text, output_path) engine.runAndWait() return output_path except Exception as e: return None def transcribe_and_translate(audio_path, direction, tone): import speech_recognition as sr recognizer = sr.Recognizer() try: with sr.AudioFile(audio_path) as source: audio = recognizer.record(source) if len(audio.frame_data) < 10000: return "⚠ Audio too short or empty. Please try again." text = recognizer.recognize_google(audio) return translate(text, direction, tone) except Exception as e: return f"⚠ Could not transcribe audio: {e}" # Gradio UI with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("## 🌍 EAC Translator") gr.Markdown("Supports English, French, and Swahili. Includes tone control, language detection, voice input, and speech playback.") with gr.Tabs(): with gr.Tab("📝 Text Translation"): input_text = gr.Textbox(label="Text to Translate", lines=3) direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English → Swahili") tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral") output_text = gr.Textbox(label="Translated Text", lines=3) voice_choice = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0]) audio_output = gr.Audio(label="Playback", interactive=False) translate_btn = gr.Button("Translate") speak_btn = gr.Button("🔊 Speak Translation") translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text) speak_btn.click(fn=speak_text_to_file, inputs=[output_text, voice_choice], outputs=audio_output) with gr.Tab("🎙 Voice Translation"): audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now") direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English → Swahili") tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral") voice_output = gr.Textbox(label="Translated Text") voice_choice2 = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0]) audio_output2 = gr.Audio(label="Playback", interactive=False) voice_translate_btn = gr.Button("Transcribe & Translate") voice_speak_btn = gr.Button("🔊 Speak Translation") voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output) voice_speak_btn.click(fn=speak_text_to_file, inputs=[voice_output, voice_choice2], outputs=audio_output2) demo.launch()