Spaces:

Athspi-ai
/

Audio-translation

Running

App Files Files Community

Audio-translation / app.py

Athspi

Update app.py

9e7d27b verified 29 days ago

raw

history blame

5.2 kB

	import os
	import tempfile
	import numpy as np
	import soundfile as sf
	import wave

	from flask import Flask, request, jsonify, send_file, send_from_directory
	from flask_cors import CORS
	import google.generativeai as genai
	from google.generativeai import types

	# Initialize Flask app
	app = Flask(__name__, static_folder='static')
	CORS(app)

	# Configure Gemini API
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	if not GEMINI_API_KEY:
	raise ValueError("GEMINI_API_KEY environment variable not set")
	genai.configure(api_key=GEMINI_API_KEY)

	# Supported languages and their BCP-47 codes
	SUPPORTED_LANGUAGES = {
	"Arabic (Egyptian)": "ar-EG",
	"German (Germany)": "de-DE",
	"English (US)": "en-US",
	"Spanish (US)": "es-US",
	"French (France)": "fr-FR",
	"Hindi (India)": "hi-IN",
	"Indonesian (Indonesia)": "id-ID",
	"Italian (Italy)": "it-IT",
	"Japanese (Japan)": "ja-JP",
	"Korean (Korea)": "ko-KR",
	"Portuguese (Brazil)": "pt-BR",
	"Russian (Russia)": "ru-RU",
	"Dutch (Netherlands)": "nl-NL",
	"Polish (Poland)": "pl-PL",
	"Thai (Thailand)": "th-TH",
	"Turkish (Turkey)": "tr-TR",
	"Vietnamese (Vietnam)": "vi-VN",
	"Romanian (Romania)": "ro-RO",
	"Ukrainian (Ukraine)": "uk-UA",
	"Bengali (Bangladesh)": "bn-BD",
	"English (India)": "en-IN",
	"Marathi (India)": "mr-IN",
	"Tamil (India)": "ta-IN",
	"Telugu (India)": "te-IN"
	}

	@app.route('/')
	def serve_index():
	return send_from_directory(app.static_folder, 'index.html')

	@app.route('/languages')
	def get_languages():
	return jsonify(list(SUPPORTED_LANGUAGES.keys()))

	@app.route('/translate', methods=['POST'])
	def translate_audio():
	try:
	if 'audio' not in request.files:
	return jsonify({'error': 'No audio file uploaded'}), 400

	audio_file = request.files['audio']
	target_language = request.form.get('language', 'English (US)')

	if not audio_file or audio_file.filename == '':
	return jsonify({'error': 'Invalid audio file'}), 400

	# Validate MIME type
	allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
	if audio_file.mimetype not in allowed_mime_types:
	return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400

	# Read audio data
	audio_data = audio_file.read()

	# Transcribe audio using Gemini
	model = genai.GenerativeModel("gemini-2.0-flash")
	audio_blob = {
	'mime_type': audio_file.mimetype,
	'data': audio_data
	}

	convo = model.start_chat()
	convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
	response = convo.send_message(audio_blob)
	transcription = response.text.strip()

	# Translate text using Gemini
	prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
	response = model.generate_content(prompt)
	translated_text = response.text.strip()

	# Generate TTS using Gemini
	# Initialize Gemini client
	client = genai.Client(api_key=GEMINI_API_KEY)

	# Determine language code
	lang_code = SUPPORTED_LANGUAGES.get(target_language, 'en-US')

	# Generate speech
	response = client.models.generate_content(
	model="gemini-2.5-flash-preview-tts",
	contents=translated_text,
	config=types.GenerateContentConfig(
	response_modalities=["AUDIO"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(
	voice_name='Kore' # You can change the voice as needed
	)
	)
	),
	)
	)

	# Extract audio data
	audio_output = response.candidates[0].content.parts[0].inline_data.data

	# Save audio to temporary file
	temp_fd, temp_output_path = tempfile.mkstemp(suffix=".wav")
	with wave.open(temp_output_path, "wb") as wf:
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(24000)
	wf.writeframes(audio_output)

	return jsonify({
	'transcription': transcription,
	'translation': translated_text,
	'audio_url': f'/download/{os.path.basename(temp_output_path)}'
	})

	except Exception as e:
	app.logger.error(f"Error processing request: {str(e)}")
	return jsonify({'error': str(e)}), 500

	@app.route('/download/<filename>')
	def download_file(filename):
	try:
	return send_file(
	os.path.join(tempfile.gettempdir(), filename),
	mimetype="audio/wav",
	as_attachment=True,
	download_name=f"translated_{filename}"
	)
	except FileNotFoundError:
	return jsonify({'error': 'File not found'}), 404

	if __name__ == '__main__':
	app.run(host="0.0.0.0", port=7860)