|
import gradio as gr |
|
import time |
|
import numpy as np |
|
import os |
|
import requests |
|
import io |
|
from pydub import AudioSegment |
|
|
|
|
|
|
|
def translate_audio(audio, language_code, SARVAM_API_KEY): |
|
|
|
|
|
api_url = "https://api.sarvam.ai/speech-to-text-translate" |
|
|
|
|
|
headers = { |
|
"api-subscription-key": SARVAM_API_KEY |
|
} |
|
|
|
|
|
model_data = { |
|
"model": "saaras:v2", |
|
"with_diarization": False, |
|
"language_code": language_code |
|
} |
|
|
|
|
|
chunk_buffer = io.BytesIO() |
|
audio.export(chunk_buffer, format="wav") |
|
chunk_buffer.seek(0) |
|
|
|
|
|
files = {'file': ('audiofile.wav', chunk_buffer, 'audio/wav')} |
|
|
|
try: |
|
response = requests.post(api_url, headers=headers, files=files, data=model_data) |
|
|
|
if response.status_code == 200 or response.status_code == 201: |
|
response_data = response.json() |
|
transcript = response_data.get("transcript", "") |
|
detected_language = response_data.get("language_code", "") |
|
elif response.status_code == 401 or response.status_code == 403: |
|
raise ValueError("β Invalid API key. Please check your Sarvam AI key.") |
|
else: |
|
raise RuntimeError(f"β Request failed with status code: {response.status_code}. Details: {response.text}") |
|
|
|
except Exception as e: |
|
raise e |
|
finally: |
|
chunk_buffer.close() |
|
|
|
return transcript,detected_language |
|
|
|
def stream_transcribe(history, new_chunk, language_code, SARVAM_API_KEY): |
|
|
|
if history is None: |
|
history = "" |
|
|
|
try: |
|
sr, y = new_chunk |
|
|
|
if y.ndim > 1: |
|
y = y.mean(axis=1) |
|
|
|
|
|
y_int16 = y.astype(np.int16) |
|
|
|
|
|
audio_segment = AudioSegment( |
|
data=y_int16.tobytes(), |
|
sample_width=2, |
|
frame_rate=sr, |
|
channels=1 |
|
) |
|
|
|
transcription,detected_language = translate_audio(audio_segment, language_code, SARVAM_API_KEY) |
|
|
|
history = history + '\n' + f'({detected_language})==> ' + transcription |
|
|
|
return history, history |
|
except ValueError as ve: |
|
return history, str(ve) |
|
except Exception as e: |
|
print(f"Error during Transcription: {e}") |
|
return history, str(e) |
|
|
|
|
|
|
|
|
|
def clear(): |
|
return "" |
|
|
|
def clear_state(): |
|
return None |
|
|
|
def clear_api_key(): |
|
return "" |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as microphone: |
|
with gr.Column(): |
|
|
|
gr.Markdown( |
|
""" |
|
## Translate simultaneously from multiple Indian languages to **English**. |
|
### It supports **22 Indian languages**, including **Hindi, Oriya, Tamil, Telugu, Gujarati**, and more. |
|
|
|
### π Sarvam AI API Key Required |
|
To use this app, you need a free API key from [Sarvam AI](https://sarvam.ai). |
|
|
|
π **Step 1:** Visit [https://sarvam.ai](https://sarvam.ai) |
|
π **Step 2:** Sign up or log in |
|
π **Step 3:** Generate your API key and paste it below |
|
|
|
Your key stays on your device and is not stored. |
|
""" |
|
) |
|
|
|
|
|
api_key_box = gr.Textbox(label="Enter SARVAM AI API Key", type="password") |
|
|
|
language_options = [ |
|
"hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", |
|
"pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN", "unknown" |
|
] |
|
language_code_box = gr.Dropdown( |
|
choices=language_options, |
|
label="Select Language Code", |
|
value="unknown" |
|
) |
|
|
|
|
|
input_audio_microphone = gr.Audio(streaming=True) |
|
output = gr.Textbox(label="Transcription", lines=10,max_lines=100, show_copy_button=True, value="") |
|
|
|
with gr.Row(): |
|
clear_button = gr.Button("Clear Output") |
|
clear_api_key_button = gr.Button("Clear API Key") |
|
|
|
state = gr.State(value="") |
|
|
|
input_audio_microphone.stream( |
|
stream_transcribe, |
|
[state, input_audio_microphone,language_code_box, api_key_box], |
|
[state, output], |
|
time_limit=30, |
|
stream_every=5, |
|
concurrency_limit=None, |
|
) |
|
|
|
clear_button.click(clear_state, outputs=[state]).then(clear, outputs=[output]) |
|
clear_api_key_button.click(clear_api_key, outputs=[api_key_box]) |
|
|
|
gr.Markdown( |
|
""" |
|
--- |
|
|
|
### π Who am I? |
|
|
|
I am **Dr. Mohan Dash**, a PhD in Industrial Computer Science and an AI Research Engineer. |
|
I run a YouTube channel called **[Intelligent Machines](https://www.youtube.com/@Mohankumardash)** where I share practical tutorials and insights on building real-world AI applications. |
|
|
|
If you find this app useful, you'll definitely enjoy the tutorials and breakdowns I post there. |
|
 |
|
|
|
--- |
|
""" |
|
) |
|
|
|
demo = microphone |
|
demo.launch() |