mohan696matlab
ui update
401279f
import gradio as gr
import time
import numpy as np
import os
import requests
import io
from pydub import AudioSegment
def translate_audio(audio, language_code, SARVAM_API_KEY):
# API endpoint for speech-to-text translation
api_url = "https://api.sarvam.ai/speech-to-text-translate"
# Headers containing the API subscription key
headers = {
"api-subscription-key": SARVAM_API_KEY # Replace with your API key
}
# Data payload for the translation request
model_data = {
"model": "saaras:v2", # Specify the model to be used
"with_diarization": False, # Set to True for speaker diarization
"language_code": language_code
}
chunk_buffer = io.BytesIO()
audio.export(chunk_buffer, format="wav")
chunk_buffer.seek(0) # Reset the pointer to the start of the stream
# Prepare the file for the API request
files = {'file': ('audiofile.wav', chunk_buffer, 'audio/wav')}
try:
response = requests.post(api_url, headers=headers, files=files, data=model_data)
if response.status_code == 200 or response.status_code == 201:
response_data = response.json()
transcript = response_data.get("transcript", "")
detected_language = response_data.get("language_code", "")
elif response.status_code == 401 or response.status_code == 403:
raise ValueError("❌ Invalid API key. Please check your Sarvam AI key.")
else:
raise RuntimeError(f"❌ Request failed with status code: {response.status_code}. Details: {response.text}")
except Exception as e:
raise e # Let the caller handle it
finally:
chunk_buffer.close()
return transcript,detected_language
def stream_transcribe(history, new_chunk, language_code, SARVAM_API_KEY):
if history is None:
history = ""
try:
sr, y = new_chunk
# Convert to mono if stereo
if y.ndim > 1:
y = y.mean(axis=1)
# Convert to int16 for AudioSegment
y_int16 = y.astype(np.int16)
# Create AudioSegment from raw PCM data
audio_segment = AudioSegment(
data=y_int16.tobytes(),
sample_width=2,
frame_rate=sr,
channels=1
)
transcription,detected_language = translate_audio(audio_segment, language_code, SARVAM_API_KEY)
history = history + '\n' + f'({detected_language})==> ' + transcription
return history, history
except ValueError as ve:
return history, str(ve)
except Exception as e:
print(f"Error during Transcription: {e}")
return history, str(e)
def clear():
return ""
def clear_state():
return None
def clear_api_key():
return ""
with gr.Blocks(theme=gr.themes.Soft()) as microphone:
with gr.Column():
gr.Markdown(
"""
## Translate simultaneously from multiple Indian languages to **English**.
### It supports **22 Indian languages**, including **Hindi, Oriya, Tamil, Telugu, Gujarati**, and more.
### πŸ”‘ Sarvam AI API Key Required
To use this app, you need a free API key from [Sarvam AI](https://sarvam.ai).
πŸ‘‰ **Step 1:** Visit [https://sarvam.ai](https://sarvam.ai)
πŸ‘‰ **Step 2:** Sign up or log in
πŸ‘‰ **Step 3:** Generate your API key and paste it below
Your key stays on your device and is not stored.
"""
)
api_key_box = gr.Textbox(label="Enter SARVAM AI API Key", type="password")
language_options = [
"hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN",
"pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN", "unknown"
]
language_code_box = gr.Dropdown(
choices=language_options,
label="Select Language Code",
value="unknown" # optional: default selected value
)
input_audio_microphone = gr.Audio(streaming=True)
output = gr.Textbox(label="Transcription", lines=10,max_lines=100, show_copy_button=True, value="")
with gr.Row():
clear_button = gr.Button("Clear Output")
clear_api_key_button = gr.Button("Clear API Key")
state = gr.State(value="")
input_audio_microphone.stream(
stream_transcribe,
[state, input_audio_microphone,language_code_box, api_key_box],
[state, output],
time_limit=30,
stream_every=5,
concurrency_limit=None,
)
clear_button.click(clear_state, outputs=[state]).then(clear, outputs=[output])
clear_api_key_button.click(clear_api_key, outputs=[api_key_box])
gr.Markdown(
"""
---
### πŸ‘‹ Who am I?
I am **Dr. Mohan Dash**, a PhD in Industrial Computer Science and an AI Research Engineer.
I run a YouTube channel called **[Intelligent Machines](https://www.youtube.com/@Mohankumardash)** where I share practical tutorials and insights on building real-world AI applications.
If you find this app useful, you'll definitely enjoy the tutorials and breakdowns I post there.
![YouTube Channel](https://yt3.googleusercontent.com/UYcIFCkqev-zwJemtbOPmmOzRU26gk-hetSSU18GWO-1wBbGHd7pjx5oTsz4x1sJ8riWg35TQw=w1707-fcrop64=1,00005a57ffffa5a8-k-c0xffffffff-no-nd-rj)
---
"""
)
demo = microphone
demo.launch()