import gradio as gr from langchain_groq import ChatGroq from langchain.schema import SystemMessage, HumanMessage import requests import tempfile import time # Configuration of the Groq model groq_api_key = "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" # Replace with your actual Groq API key llm = ChatGroq(api_key=groq_api_key, model_name="llama3-70b-8192") # Corrected model name # ElevenLabs API key and voice ID XI_API_KEY = "sk_b254c267851485b60d23fb2e15fa8fde9f5fbc0d835127e2" # Replace with your ElevenLabs API key VOICE_ID = "iYwRDEf2D1WyqRRecXPA" # Replace with your voice ID def translate_and_speak(user_input, target_language): try: start_time = time.time() # Start total processing time # Generate translation using Groq model translation_start = time.time() system_prompt = f"You are expected to translate the user input exclusively into {target_language} without adding anything else." messages = [ SystemMessage(content=system_prompt), HumanMessage(content=user_input) ] response = llm.invoke(messages) translation_end = time.time() # Check if the response is valid if not response or not hasattr(response, 'content'): raise ValueError("Invalid response from the translation model.") generated_text = response.content.strip() # Use ElevenLabs API to generate speech tts_start = time.time() url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}" headers = { "Accept": "audio/mpeg", "Content-Type": "application/json", "xi-api-key": XI_API_KEY } data = { "text": generated_text, "model_id": "eleven_multilingual_v2", "voice_settings": { "stability": 0.75, "similarity_boost": 0.75 } } tts_response = requests.post(url, json=data, headers=headers) tts_end = time.time() if tts_response.status_code == 200: # Save audio to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: fp.write(tts_response.content) audio_file = fp.name end_time = time.time() # Calculate processing times translation_time = translation_end - translation_start tts_time = tts_end - tts_start total_time = end_time - start_time # Prepare timings information timings_info = f"Translation time: {translation_time:.2f} seconds\n" timings_info += f"Text-to-Speech time: {tts_time:.2f} seconds\n" timings_info += f"Total processing time: {total_time:.2f} seconds" return generated_text, audio_file, timings_info else: error_message = f"Text-to-Speech API Error: {tts_response.status_code} - {tts_response.text}" return error_message, None, None except Exception as e: # Return the exception message error_details = f"An error occurred: {str(e)}" return error_details, None, None # Create Gradio interface iface = gr.Interface( fn=translate_and_speak, inputs=[ gr.Textbox(lines=2, placeholder="Enter text to translate...", label="Input Text"), gr.Dropdown( choices=["Spanish", "French", "German", "Italian", "Chinese", "Japanese"], value="Spanish", label="Target Language" ) ], outputs=[ gr.Textbox(label="Translated Text"), gr.Audio(label="Spoken Audio", autoplay=True), gr.Textbox(label="Processing Times") ], title="Multilingual Text Translator and Speech Synthesizer", description="Translate text into the selected language and listen to the spoken audio.", allow_flagging="never" ) # Launch the app iface.launch()