Spaces:

CRIMJOBBERS
/

eac-translator

Building

App Files Files Community

eac-translator / app.py

CRIMJOBBERS

Update app.py

3de9eb1 verified about 2 months ago

raw

history blame

6.59 kB

	import gradio as gr
	from transformers import MarianMTModel, MarianTokenizer
	from datetime import datetime
	import langid
	import os
	import pyttsx3
	import time
	import warnings

	# Optional: suppress sacremoses warning
	warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")

	# Set FFmpeg path explicitly (for pydub and audio playback)
	os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin"

	langid.set_languages(['en', 'fr', 'sw'])

	MODEL_MAP = {
	"English → Swahili": "Helsinki-NLP/opus-mt-en-sw",
	"English → French": "Helsinki-NLP/opus-mt-en-fr",
	"French → English": "Helsinki-NLP/opus-mt-fr-en",
	"French → Swahili (via English)": ["Helsinki-NLP/opus-mt-fr-en", "Helsinki-NLP/opus-mt-en-sw"]
	}

	TONE_MODIFIERS = {
	"Neutral": "",
	"Romantic": "Express this romantically: ",
	"Formal": "Translate this in a formal tone: ",
	"Casual": "Make this sound casual: "
	}

	loaded_models = {}

	def load_model(model_name):
	if model_name not in loaded_models:
	tokenizer = MarianTokenizer.from_pretrained(model_name)
	model = MarianMTModel.from_pretrained(model_name)
	loaded_models[model_name] = (tokenizer, model)
	return loaded_models[model_name]

	def detect_language(text):
	try:
	lang, score = langid.classify(text)
	return lang
	except:
	return "unknown"

	def translate(text, direction, tone):
	detected_lang = detect_language(text)
	expected_src = direction.split(" → ")[0].lower()

	if expected_src.startswith("english") and detected_lang != "en":
	warning = f"⚠ Detected language is '{detected_lang}', but you selected English as source."
	elif expected_src.startswith("french") and detected_lang != "fr":
	warning = f"⚠ Detected language is '{detected_lang}', but you selected French as source."
	elif expected_src.startswith("swahili") and detected_lang != "sw":
	warning = f"⚠ Detected language is '{detected_lang}', but you selected Swahili as source."
	else:
	warning = ""

	prompt = TONE_MODIFIERS[tone] + text
	model_info = MODEL_MAP[direction]

	if isinstance(model_info, list):
	tokenizer1, model1 = load_model(model_info[0])
	encoded1 = tokenizer1(prompt, return_tensors="pt", padding=True, truncation=True)
	intermediate = model1.generate(**encoded1)
	intermediate_text = tokenizer1.decode(intermediate[0], skip_special_tokens=True)

	tokenizer2, model2 = load_model(model_info[1])
	encoded2 = tokenizer2(intermediate_text, return_tensors="pt", padding=True, truncation=True)
	final = model2.generate(**encoded2)
	translation = tokenizer2.decode(final[0], skip_special_tokens=True)
	else:
	tokenizer, model = load_model(model_info)
	encoded = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
	generated = model.generate(**encoded)
	translation = tokenizer.decode(generated[0], skip_special_tokens=True)

	with open("translation_log.txt", "a", encoding="utf-8") as f:
	f.write(f"[{datetime.now()}] {direction} \| Tone: {tone}\n")
	f.write(f"Input: {text}\nOutput: {translation}\n\n")

	return f"{warning}\n{translation}" if warning else translation

	# Get available voices
	engine = pyttsx3.init()
	voices = engine.getProperty('voices')
	voice_names = [voice.name for voice in voices]

	def speak_text_to_file(text, voice_name):
	try:
	engine = pyttsx3.init()
	engine.setProperty('rate', 150)
	for voice in voices:
	if voice.name == voice_name:
	engine.setProperty('voice', voice.id)
	break
	output_path = "tts_output.wav"
	engine.save_to_file(text, output_path)
	engine.runAndWait()
	return output_path
	except Exception as e:
	return None

	def transcribe_and_translate(audio_path, direction, tone):
	import speech_recognition as sr
	recognizer = sr.Recognizer()
	try:
	with sr.AudioFile(audio_path) as source:
	audio = recognizer.record(source)
	if len(audio.frame_data) < 10000:
	return "⚠ Audio too short or empty. Please try again."
	text = recognizer.recognize_google(audio)
	return translate(text, direction, tone)
	except Exception as e:
	return f"⚠ Could not transcribe audio: {e}"

	# Gradio UI
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("## 🌍 EAC Translator")
	gr.Markdown("Supports English, French, and Swahili. Includes tone control, language detection, voice input, and speech playback.")

	with gr.Tabs():
	with gr.Tab("📝 Text Translation"):
	input_text = gr.Textbox(label="Text to Translate", lines=3)
	direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English → Swahili")
	tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
	output_text = gr.Textbox(label="Translated Text", lines=3)
	voice_choice = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0])
	audio_output = gr.Audio(label="Playback", interactive=False)
	translate_btn = gr.Button("Translate")
	speak_btn = gr.Button("🔊 Speak Translation")
	translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
	speak_btn.click(fn=speak_text_to_file, inputs=[output_text, voice_choice], outputs=audio_output)

	with gr.Tab("🎙 Voice Translation"):
	audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
	direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English → Swahili")
	tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
	voice_output = gr.Textbox(label="Translated Text")
	voice_choice2 = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0])
	audio_output2 = gr.Audio(label="Playback", interactive=False)
	voice_translate_btn = gr.Button("Transcribe & Translate")
	voice_speak_btn = gr.Button("🔊 Speak Translation")
	voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
	voice_speak_btn.click(fn=speak_text_to_file, inputs=[voice_output, voice_choice2], outputs=audio_output2)

	demo.launch()