Spaces:
Running
Running
File size: 6,421 Bytes
5f33e0e 385365a 9ffbfd1 385365a f49c906 385365a f49c906 bfc5175 280b5d0 e51d62b 7cc4829 5ddb059 e51d62b bfc5175 280b5d0 e51d62b 385365a e51d62b 280b5d0 e51d62b 280b5d0 f49c906 e51d62b bfc5175 e51d62b 385365a e51d62b 385365a 6ebed08 e51d62b dbed07a e51d62b 63a0fca e51d62b dbed07a 965bd2d e51d62b 5ddb059 6c131f6 e51d62b 385365a e51d62b 385365a e51d62b 5ddb059 e51d62b 5ddb059 e51d62b f49c906 9ffbfd1 e51d62b dbed07a e51d62b 385365a e51d62b 385365a e51d62b 385365a e51d62b 280b5d0 385365a e51d62b 385365a e51d62b 385365a e51d62b 385365a e51d62b 385365a 5ddb059 e51d62b dbe8a71 e51d62b 7cc4829 e51d62b dbe8a71 e51d62b 9ffbfd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import os
import tempfile
import wave
import numpy as np
import soundfile as sf
from flask import Flask, request, jsonify, send_file, send_from_directory
from flask_cors import CORS
from werkzeug.utils import secure_filename
from gtts import gTTS, lang
from kokoro import KPipeline
import google.generativeai as genai
from google.generativeai.types import (
GenerateContentConfig,
SpeechConfig,
VoiceConfig,
PrebuiltVoiceConfig,
)
# -----------------------------------------------------------------------------
# Configuration
# -----------------------------------------------------------------------------
# 1) Make sure you've run:
# pip install --upgrade google-generativeai gTTS soundfile kokoro flask flask-cors werkzeug
#
# 2) Set your Gemini API key in the environment:
# export GEMINI_API_KEY="your_real_api_key_here"
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise RuntimeError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)
client = genai.Client(api_key=GEMINI_API_KEY)
# Kokoro and gTTS language maps
KOKORO_LANGUAGES = {
"American English": "a",
"British English": "b",
"Mandarin Chinese": "z",
"Spanish": "e",
"French": "f",
"Hindi": "h",
"Italian": "i",
"Brazilian Portuguese": "p",
}
GTTS_LANGUAGES = lang.tts_langs()
GTTS_LANGUAGES["ja"] = "Japanese" # explicit Japanese support
SUPPORTED_LANGUAGES = sorted(
set(KOKORO_LANGUAGES.keys()) | set(GTTS_LANGUAGES.values())
)
# Voice name for Gemini TTS preview
GEMINI_VOICE_NAME = "Kore"
# -----------------------------------------------------------------------------
# Helpers
# -----------------------------------------------------------------------------
def wave_file(filename: str, pcm: bytes, channels=1, rate=24000, sample_width=2):
"""Write raw PCM into a .wav file."""
with wave.open(filename, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(sample_width)
wf.setframerate(rate)
wf.writeframes(pcm)
# -----------------------------------------------------------------------------
# Flask App
# -----------------------------------------------------------------------------
app = Flask(__name__, static_folder="static")
CORS(app)
@app.route("/")
def serve_index():
# serve your index.html from ./static/index.html
return send_from_directory(app.static_folder, "index.html")
@app.route("/languages")
def list_languages():
return jsonify(SUPPORTED_LANGUAGES)
@app.route("/translate", methods=["POST"])
def translate_audio():
try:
# 1. Receive file + target language
if "audio" not in request.files:
return jsonify(error="No audio file uploaded"), 400
audio_file = request.files["audio"]
target_lang = request.form.get("language", "English")
if not audio_file or audio_file.filename == "":
return jsonify(error="Invalid audio file"), 400
# 2. Validate MIME type
if audio_file.mimetype not in ("audio/wav", "audio/mpeg", "audio/mp4", "audio/webm"):
return jsonify(error=f"Unsupported file type: {audio_file.mimetype}"), 400
# 3. Transcribe with Gemini
model = genai.GenerativeModel("gemini-2.0-flash")
blob = {"mime_type": audio_file.mimetype, "data": audio_file.read()}
convo = model.start_chat()
convo.send_message(
"You are a professional transcriber. Transcribe this audio accurately, verbatim."
)
resp = convo.send_message(blob)
transcription = resp.text.strip()
# 4. Translate with Gemini
prompt = f"Translate the following text to {target_lang}, preserving meaning and cultural nuances:\n\n{transcription}"
translation_resp = model.generate_content(prompt)
translated_text = translation_resp.text.strip()
# 5. Try Gemini TTS 2.5 preview
try:
tts_resp = client.models.generate_content(
model="gemini-2.5-flash-preview-tts",
contents=translated_text,
config=GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=SpeechConfig(
voice_config=VoiceConfig(
prebuilt_voice_config=PrebuiltVoiceConfig(
voice_name=GEMINI_VOICE_NAME
)
)
),
),
)
pcm_data = tts_resp.candidates[0].content.parts[0].inline_data.data
out_path = os.path.join(tempfile.gettempdir(), f"tts_gemini.wav")
wave_file(out_path, pcm_data)
except Exception:
# Fallback: Kokoro
if target_lang in KOKORO_LANGUAGES:
code = KOKORO_LANGUAGES[target_lang]
pipeline = KPipeline(lang_code=code)
generator = pipeline(translated_text, voice="af_heart", speed=1)
segments = [audio for _, _, audio in generator if audio is not None]
if segments:
arr = np.concatenate(segments)
out_path = os.path.join(tempfile.gettempdir(), "tts_kokoro.wav")
sf.write(out_path, arr, 24000)
else:
raise RuntimeError("Kokoro produced no audio")
# Final fallback: gTTS
else:
gtts_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_lang), "en")
tts = gTTS(translated_text, lang=gtts_code)
out_path = os.path.join(tempfile.gettempdir(), "tts_gtts.mp3")
tts.save(out_path)
return jsonify(
transcription=transcription,
translation=translated_text,
audio_url=f"/download/{os.path.basename(out_path)}",
)
except Exception as e:
app.logger.exception("Error in /translate")
return jsonify(error=str(e)), 500
@app.route("/download/<filename>")
def download_file(filename):
path = os.path.join(tempfile.gettempdir(), filename)
if not os.path.isfile(path):
return jsonify(error="File not found"), 404
return send_file(path, as_attachment=True, download_name=f"translated_{filename}")
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860) |