camparchimedes commited on
Commit
3d1368b
·
verified ·
1 Parent(s): 53b4978

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -26
app.py CHANGED
@@ -22,6 +22,7 @@ import re
22
  import uuid
23
  import time
24
  import psutil
 
25
  import subprocess
26
  from tqdm import tqdm
27
 
@@ -53,70 +54,102 @@ CACHE_EXAMPLES = torch.device('cuda') and os.getenv("CACHE_EXAMPLES", "0") == "1
53
  device = torch.device('cuda')
54
  #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
55
 
56
- #@spaces.GPU
57
- def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)): # microphone
 
 
 
58
 
59
- file = file_upload # microphone if microphone is not None else
60
- start_time = time.time()
 
 
 
 
61
 
62
- #--------------____________________________________________--------------"
63
 
64
- #if torch.cuda.is_available():
65
- #with torch.no_grad():
66
- #pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
67
-
68
  with torch.no_grad():
69
  pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
70
-
71
- """
72
  # -- chunking
73
  chunks = chunk_audio(file, chunk_length_ms=30000, overlap_length_ms=5000)
74
 
75
  full_transcription = []
76
  for chunk in chunks:
77
- # -- convert chunk to temporary file-like object
78
  temp_audio = chunk.export(format="wav")
79
 
80
  # -- transcribe chunk
81
  text = pipe(temp_audio)["text"]
82
  full_transcription.append(text)
83
 
84
- # -- join chunk transcriptions
85
  full_text = " ".join(full_transcription)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  """
 
 
87
 
 
 
88
 
89
- text = pipe(file)["text"]
 
 
 
 
 
 
 
 
90
 
91
  #--------------____________________________________________--------------"
92
 
93
- end_time = time.time()
94
- output_time = end_time - start_time
95
 
96
  # --Word count
97
- word_count = len(text.split())
98
 
99
  # --Memory metrics
100
- memory = psutil.virtual_memory()
101
 
102
  # --CPU metric
103
- cpu_usage = psutil.cpu_percent(interval=1)
104
 
105
  # --GPU metric
106
- gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
107
 
108
  # --system info string
109
- system_info = f"""
110
- Processing time: {output_time:.2f} seconds.
111
- Number of words: {word_count}
112
- GPU Memory: {gpu_memory}
113
- """
114
  #--------------____________________________________________--------------"
115
  #CPU Usage: {cpu_usage}%
116
  #Memory used: {memory.percent}%
117
  #GPU Utilization: {gpu_utilization}%
118
 
119
- return text, system_info
120
 
121
 
122
  ###############################################################################
 
22
  import uuid
23
  import time
24
  import psutil
25
+ import pydub
26
  import subprocess
27
  from tqdm import tqdm
28
 
 
54
  device = torch.device('cuda')
55
  #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
56
 
57
+ file = file_upload
58
+
59
+ def chunk_audio(file, chunk_length_ms=30000, overlap_length_ms=5000):
60
+ # -- pydub
61
+ audio = AudioSegment.from_file(file)
62
 
63
+ # -- create chunks with overlap
64
+ chunks = []
65
+ for i in range(0, len(audio), chunk_length_ms - overlap_length_ms):
66
+ start = max(0, i)
67
+ end = min(len(audio), i + chunk_length_ms)
68
+ chunks.append(audio[start:end])
69
 
70
+ return chunks
71
 
72
+ def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)):
73
+ start_time = time.time()
74
+
75
+ # Load the speech recognition model
76
  with torch.no_grad():
77
  pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
78
+
 
79
  # -- chunking
80
  chunks = chunk_audio(file, chunk_length_ms=30000, overlap_length_ms=5000)
81
 
82
  full_transcription = []
83
  for chunk in chunks:
84
+ # -- convert to temporary file-like object
85
  temp_audio = chunk.export(format="wav")
86
 
87
  # -- transcribe chunk
88
  text = pipe(temp_audio)["text"]
89
  full_transcription.append(text)
90
 
91
+ # -- join
92
  full_text = " ".join(full_transcription)
93
+
94
+ # -- timimg, word count
95
+ end_time = time.time()
96
+ output_time = end_time - start_time
97
+ word_count = len(full_text.split())
98
+
99
+ # -- metrics
100
+ memory = psutil.virtual_memory()
101
+ cpu_usage = psutil.cpu_percent(interval=1)
102
+
103
+ # --system info string
104
+ system_info = f"""
105
+ Processing time: {output_time:.2f} seconds.
106
+ Number of words: {word_count}
107
  """
108
+
109
+ return full_text, system_info
110
 
111
+ #@spaces.GPU
112
+ #def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)): # microphone
113
 
114
+ #file = file_upload # microphone if microphone is not None else
115
+ #start_time = time.time()
116
+
117
+ #--------------____________________________________________--------------"
118
+
119
+ #with torch.no_grad():
120
+ #pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
121
+
122
+ #text = pipe(file)["text"]
123
 
124
  #--------------____________________________________________--------------"
125
 
126
+ #end_time = time.time()
127
+ #output_time = end_time - start_time
128
 
129
  # --Word count
130
+ #word_count = len(text.split())
131
 
132
  # --Memory metrics
133
+ #memory = psutil.virtual_memory()
134
 
135
  # --CPU metric
136
+ #cpu_usage = psutil.cpu_percent(interval=1)
137
 
138
  # --GPU metric
139
+ #gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
140
 
141
  # --system info string
142
+ #system_info = f"""
143
+ #Processing time: {output_time:.2f} seconds.
144
+ #Number of words: {word_count}
145
+ #GPU Memory: {gpu_memory}
146
+
147
  #--------------____________________________________________--------------"
148
  #CPU Usage: {cpu_usage}%
149
  #Memory used: {memory.percent}%
150
  #GPU Utilization: {gpu_utilization}%
151
 
152
+ #return text, system_info
153
 
154
 
155
  ###############################################################################