Spaces:

freddyaboulton
/

really-fast-whisper

Running on CPU Upgrade

freddyaboulton HF Staff commited on Apr 17

Commit

f5c4474

verified ·

1 Parent(s): c2be063

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,21 +23,24 @@ load_dotenv()
 client = AsyncClient(timeout=30)
-async def transcribe(audio: tuple[int, np.ndarray], transcript: str):
     response = await client.post(
         url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
         headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
         files={"file": audio_to_bytes(audio)},
-        data={
-            "response_format": "text",
-        },
     )
-    yield AdditionalOutputs(transcript + " " + response.text)
 transcript = gr.Textbox(label="Transcript")
 stream = Stream(
-    ReplyOnPause(transcribe, input_sample_rate=16_000),
     modality="audio",
     mode="send",
     additional_inputs=[transcript],
@@ -65,7 +68,16 @@ with gr.Blocks() as demo:
     </h2>
     """
     )
-    stream.ui.render()
 if __name__ == "__main__":
     demo.launch(allowed_paths=["AV_Huggy.png"])

 client = AsyncClient(timeout=30)
+async def transcribe_file(audio: tuple[int, np.ndarray]):
     response = await client.post(
         url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
         headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
         files={"file": audio_to_bytes(audio)},
+        data={"response_format": "text"},
     )
+    return response.text
+async def transcribe(audio: tuple[int, np.ndarray], transcript: str):
+    text = await transcribe_file(audio)
+    yield AdditionalOutputs(transcript + " " + text)
 transcript = gr.Textbox(label="Transcript")
 stream = Stream(
+    ReplyOnPause(transcribe),
     modality="audio",
     mode="send",
     additional_inputs=[transcript],
     </h2>
     """
     )
+    with gr.Tabs():
+        with gr.Tab("Streaming"):
+            gr.Markdown("Grant access to the microphone and speak naturally. The transcript will be updated as you pause.")
+            stream.ui.render()
+        with gr.Tab("File Upload"):
+            gr.Interface(
+                fn=transcribe_file,
+                inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"])],
+                outputs=gr.Textbox(label="Transcript"),
+            )
 if __name__ == "__main__":
     demo.launch(allowed_paths=["AV_Huggy.png"])