Mohan-diffuser commited on
Commit
6af1e98
Β·
verified Β·
1 Parent(s): 35958b1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -0
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import numpy as np
4
+ import os
5
+ import requests
6
+ import io
7
+ from pydub import AudioSegment
8
+
9
+
10
+
11
+ def translate_audio(audio, SARVAM_API_KEY):
12
+
13
+ # API endpoint for speech-to-text translation
14
+ api_url = "https://api.sarvam.ai/speech-to-text-translate"
15
+
16
+ # Headers containing the API subscription key
17
+ headers = {
18
+ "api-subscription-key": SARVAM_API_KEY # Replace with your API key
19
+ }
20
+
21
+ # Data payload for the translation request
22
+ model_data = {
23
+ "model": "saaras:v2", # Specify the model to be used
24
+ "with_diarization": False # Set to True for speaker diarization
25
+ }
26
+
27
+
28
+ chunk_buffer = io.BytesIO()
29
+ audio.export(chunk_buffer, format="wav")
30
+ chunk_buffer.seek(0) # Reset the pointer to the start of the stream
31
+
32
+ # Prepare the file for the API request
33
+ files = {'file': ('audiofile.wav', chunk_buffer, 'audio/wav')}
34
+
35
+ try:
36
+ # Make the POST request to the API
37
+ response = requests.post(api_url, headers=headers, files=files, data=model_data)
38
+ if response.status_code == 200 or response.status_code == 201:
39
+ response_data = response.json()
40
+ transcript = response_data.get("transcript", "")
41
+ else:
42
+ # Handle failed requests
43
+ print(f"failed with status code: {response.status_code}")
44
+ print("Response:", response.text)
45
+ except Exception as e:
46
+ # Handle any exceptions during the request
47
+ print(f"Error processing chunk {e}")
48
+ finally:
49
+ # Ensure the buffer is closed after processing
50
+ chunk_buffer.close()
51
+
52
+ return transcript
53
+
54
+ def stream_transcribe(history, new_chunk, SARVAM_API_KEY):
55
+ start_time = time.time()
56
+
57
+ if history is None:
58
+ history = ""
59
+
60
+ try:
61
+ sr, y = new_chunk
62
+
63
+ # Convert to mono if stereo
64
+ if y.ndim > 1:
65
+ y = y.mean(axis=1)
66
+
67
+ # Convert to int16 for AudioSegment
68
+ y_int16 = y.astype(np.int16)
69
+
70
+ # Create AudioSegment from raw PCM data
71
+ audio_segment = AudioSegment(
72
+ data=y_int16.tobytes(),
73
+ sample_width=2,
74
+ frame_rate=sr,
75
+ channels=1
76
+ )
77
+
78
+ transcription = translate_audio(audio_segment, SARVAM_API_KEY)
79
+ end_time = time.time()
80
+ latency = end_time - start_time
81
+ history = history + '\n' + transcription
82
+
83
+ return history, history, f"{latency:.2f}"
84
+ except Exception as e:
85
+ print(f"Error during Transcription: {e}")
86
+ return history, str(e), "Error"
87
+
88
+
89
+
90
+
91
+ def clear():
92
+ return ""
93
+
94
+ def clear_state():
95
+ return None
96
+
97
+ def clear_api_key():
98
+ return ""
99
+
100
+ with open("gradio.css", "r") as f:
101
+ custom_css = f.read()
102
+ with gr.Blocks(theme=gr.themes.Glass()) as microphone:
103
+ with gr.Column():
104
+
105
+ gr.Markdown(
106
+ """
107
+ ### πŸ”‘ Sarvam AI API Key Required
108
+ To use this app, you need a free API key from [Sarvam AI](https://sarvam.ai).
109
+
110
+ πŸ‘‰ **Step 1:** Visit [https://sarvam.ai](https://sarvam.ai)
111
+ πŸ‘‰ **Step 2:** Sign up or log in
112
+ πŸ‘‰ **Step 3:** Generate your API key and paste it below
113
+
114
+ Your key stays on your device and is not stored.
115
+ """
116
+ )
117
+ api_key_box = gr.Textbox(label="Enter SARVAM AI API Key", type="password")
118
+
119
+ with gr.Row():
120
+ input_audio_microphone = gr.Audio(streaming=True)
121
+ output = gr.Textbox(label="Transcription", value="")
122
+ latency_textbox = gr.Textbox(label="Latency (seconds)", value="0.0", scale=0)
123
+ with gr.Row():
124
+ clear_button = gr.Button("Clear Output")
125
+ clear_api_key_button = gr.Button("Clear API Key")
126
+ state = gr.State(value="")
127
+ def wrapped_stream_transcribe(history, new_chunk, api_key):
128
+ return stream_transcribe(history, new_chunk, api_key)
129
+
130
+ input_audio_microphone.stream(
131
+ wrapped_stream_transcribe,
132
+ [state, input_audio_microphone, api_key_box],
133
+ [state, output, latency_textbox],
134
+ time_limit=30,
135
+ stream_every=5,
136
+ concurrency_limit=None,
137
+ )
138
+
139
+ clear_button.click(clear_state, outputs=[state]).then(clear, outputs=[output])
140
+ clear_api_key_button.click(clear_api_key, outputs=[api_key_box])
141
+
142
+
143
+ demo = microphone
144
+ demo.launch()