Emmanuel08 commited on
Commit
4fb7b6c
Β·
verified Β·
1 Parent(s): 4b6216c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -81
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import torch
 
2
  import gradio as gr
3
  import time
4
  import numpy as np
@@ -27,6 +28,7 @@ pipe = pipeline(
27
  chunk_length_s=2, # βœ… Process in 2-second chunks for ultra-low latency
28
  torch_dtype=torch_dtype,
29
  device=device,
 
30
  )
31
 
32
  # βœ… 4️⃣ Real-Time Streaming Transcription (Microphone)
@@ -42,84 +44,4 @@ def stream_transcribe(stream, new_chunk):
42
  y = y.astype(np.float32)
43
  y /= np.max(np.abs(y))
44
 
45
- # βœ… Append to Stream
46
- if stream is not None:
47
- stream = np.concatenate([stream, y])
48
- else:
49
- stream = y
50
-
51
- # βœ… Run Transcription
52
- transcription = pipe({"sampling_rate": sr, "raw": stream})["text"]
53
- latency = time.time() - start_time
54
-
55
- return stream, transcription, f"{latency:.2f} sec"
56
-
57
- except Exception as e:
58
- print(f"Error: {e}")
59
- return stream, str(e), "Error"
60
-
61
- # βœ… 5️⃣ Transcription for File Upload
62
- def transcribe(inputs, previous_transcription):
63
- start_time = time.time()
64
- try:
65
- # βœ… Convert file input to correct format
66
- sample_rate, audio_data = inputs
67
- transcription = pipe({"sampling_rate": sample_rate, "raw": audio_data})["text"]
68
-
69
- previous_transcription += transcription
70
- latency = time.time() - start_time
71
-
72
- return previous_transcription, f"{latency:.2f} sec"
73
-
74
- except Exception as e:
75
- print(f"Error: {e}")
76
- return previous_transcription, "Error"
77
-
78
- # βœ… 6️⃣ Clear Function
79
- def clear():
80
- return ""
81
-
82
- # βœ… 7️⃣ Gradio Interface (Microphone Streaming)
83
- with gr.Blocks() as microphone:
84
- gr.Markdown(f"# Whisper Tiny - Real-Time Transcription (CPU) πŸŽ™οΈ")
85
- gr.Markdown(f"Using [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) for ultra-fast speech-to-text.")
86
-
87
- with gr.Row():
88
- input_audio_microphone = gr.Audio(sources=["microphone"], type="numpy", streaming=True)
89
- output = gr.Textbox(label="Live Transcription", value="")
90
- latency_textbox = gr.Textbox(label="Latency (seconds)", value="0.0")
91
-
92
- with gr.Row():
93
- clear_button = gr.Button("Clear Output")
94
-
95
- state = gr.State()
96
- input_audio_microphone.stream(
97
- stream_transcribe, [state, input_audio_microphone],
98
- [state, output, latency_textbox], time_limit=30, stream_every=1
99
- )
100
- clear_button.click(clear, outputs=[output])
101
-
102
- # βœ… 8️⃣ Gradio Interface (File Upload)
103
- with gr.Blocks() as file:
104
- gr.Markdown(f"# Upload Audio File for Transcription 🎡")
105
- gr.Markdown(f"Using [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) for speech-to-text.")
106
-
107
- with gr.Row():
108
- input_audio = gr.Audio(sources=["upload"], type="numpy")
109
- output = gr.Textbox(label="Transcription", value="")
110
- latency_textbox = gr.Textbox(label="Latency (seconds)", value="0.0")
111
-
112
- with gr.Row():
113
- submit_button = gr.Button("Submit")
114
- clear_button = gr.Button("Clear Output")
115
-
116
- submit_button.click(transcribe, [input_audio, output], [output, latency_textbox])
117
- clear_button.click(clear, outputs=[output])
118
-
119
- # βœ… 9️⃣ Final Gradio App (Supports Microphone & File Upload)
120
- with gr.Blocks(theme=gr.themes.Ocean()) as demo:
121
- gr.TabbedInterface([microphone, file], ["Microphone", "Upload Audio"])
122
-
123
- # βœ… 1️⃣0️⃣ Run Gradio Locally
124
- if __name__ == "__main__":
125
- demo.launch()
 
1
  import torch
2
+ import torchaudio # βœ… Added torchaudio to handle audio resampling
3
  import gradio as gr
4
  import time
5
  import numpy as np
 
28
  chunk_length_s=2, # βœ… Process in 2-second chunks for ultra-low latency
29
  torch_dtype=torch_dtype,
30
  device=device,
31
+ sampling_rate=16000, # βœ… Explicitly set sampling rate to avoid resampling issues
32
  )
33
 
34
  # βœ… 4️⃣ Real-Time Streaming Transcription (Microphone)
 
44
  y = y.astype(np.float32)
45
  y /= np.max(np.abs(y))
46
 
47
+ # βœ… Resample audio