muhtasham commited on
Commit
1e5e969
·
1 Parent(s): 07c515e
Files changed (1) hide show
  1. app.py +27 -33
app.py CHANGED
@@ -1,14 +1,10 @@
1
- import spaces
2
- import torch
3
  import gradio as gr
4
- from transformers import pipeline
5
- from transformers.pipelines.audio_utils import ffmpeg_read
6
  import subprocess
7
  from loguru import logger
8
 
9
- MODEL_NAME = "muhtasham/whisper-tg"
10
- BATCH_SIZE = 8
11
- FILE_LIMIT_MB = 1000
12
 
13
  # Check if ffmpeg is installed
14
  def check_ffmpeg():
@@ -21,33 +17,33 @@ def check_ffmpeg():
21
  # Initialize ffmpeg check
22
  check_ffmpeg()
23
 
24
- device = 0 if torch.cuda.is_available() else "cpu"
25
-
26
- pipe = pipeline(
27
- task="automatic-speech-recognition",
28
- model=MODEL_NAME,
29
- chunk_length_s=30,
30
- device=device,
31
- )
32
-
33
- print(pipe)
34
-
35
- @spaces.GPU
36
  def transcribe(inputs):
37
  if inputs is None:
38
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
39
 
40
- result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
41
-
42
- # Format timestamps with text
43
- timestamps = []
44
- for chunk in result["chunks"]:
45
- start_time = chunk["timestamp"][0]
46
- end_time = chunk["timestamp"][1]
47
- text = chunk["text"].strip()
48
- timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text} \n \n")
49
-
50
- return result["text"], "\n".join(timestamps)
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  demo = gr.Blocks(theme=gr.themes.Ocean())
53
 
@@ -62,9 +58,7 @@ mf_transcribe = gr.Interface(
62
  ],
63
  title="Whisper Large V3 Turbo: Transcribe Audio",
64
  description=(
65
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
66
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
67
- " of arbitrary length."
68
  ),
69
  allow_flagging="never",
70
  )
 
 
 
1
  import gradio as gr
2
+ import requests
 
3
  import subprocess
4
  from loguru import logger
5
 
6
+ API_URL = "https://skdpcqcdd929o4k3.us-east-1.aws.endpoints.huggingface.cloud"
7
+
 
8
 
9
  # Check if ffmpeg is installed
10
  def check_ffmpeg():
 
17
  # Initialize ffmpeg check
18
  check_ffmpeg()
19
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def transcribe(inputs):
21
  if inputs is None:
22
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
23
 
24
+ headers = {
25
+ "Accept": "application/json",
26
+ "Content-Type": "audio/flac"
27
+ }
28
+
29
+ try:
30
+ with open(inputs, "rb") as f:
31
+ data = f.read()
32
+ response = requests.post(API_URL, headers=headers, data=data)
33
+ result = response.json()
34
+
35
+ # Format timestamps with text
36
+ timestamps = []
37
+ for chunk in result["chunks"]:
38
+ start_time = chunk["timestamp"][0]
39
+ end_time = chunk["timestamp"][1]
40
+ text = chunk["text"].strip()
41
+ timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text} \n \n")
42
+
43
+ return result["text"], "\n".join(timestamps)
44
+ except Exception as e:
45
+ logger.error(f"Error during transcription: {str(e)}")
46
+ raise gr.Error(f"Failed to transcribe audio: {str(e)}")
47
 
48
  demo = gr.Blocks(theme=gr.themes.Ocean())
49
 
 
58
  ],
59
  title="Whisper Large V3 Turbo: Transcribe Audio",
60
  description=(
61
+ "Transcribe long-form microphone or audio inputs with the click of a button! "
 
 
62
  ),
63
  allow_flagging="never",
64
  )