camparchimedes commited on
Commit
9da571f
·
verified ·
1 Parent(s): f7f9941

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -4
app.py CHANGED
@@ -31,6 +31,7 @@ from fpdf import FPDF
31
  from pathlib import Path
32
 
33
  import numpy as np
 
34
  import torch
35
  from transformers import pipeline
36
 
@@ -54,6 +55,61 @@ CACHE_EXAMPLES = torch.device('cuda') and os.getenv("CACHE_EXAMPLES", "0") == "1
54
  device = torch.device('cuda')
55
  #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  #@spaces.GPU
58
  def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)): # microphone
59
 
@@ -86,9 +142,9 @@ def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)): # microphone
86
 
87
  # --system info string
88
  system_info = f"""
89
- Processing time: {output_time:.2f} seconds.
90
- Number of words: {word_count}
91
- GPU Memory: {gpu_memory}"""
92
 
93
  #--------------____________________________________________--------------"
94
 
@@ -96,8 +152,9 @@ def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)): # microphone
96
  #Memory used: {memory.percent}%
97
  #GPU Utilization: {gpu_utilization}%
98
 
99
- return text, system_info
100
 
 
101
 
102
  ###############################################################################
103
  # Interface.
 
31
  from pathlib import Path
32
 
33
  import numpy as np
34
+ import librosa
35
  import torch
36
  from transformers import pipeline
37
 
 
55
  device = torch.device('cuda')
56
  #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
57
 
58
+ def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)):
59
+
60
+ file = file_upload # microphone if microphone is not None else
61
+ start_time = time.time()
62
+
63
+ # -- ex subrosa
64
+ audio, sr = librosa.load(file_upload, sr=None)
65
+ duration = librosa.get_duration(y=audio, sr=sr)
66
+
67
+ # -- asr pipeline
68
+ with torch.no_grad():
69
+ pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, device=device)
70
+
71
+ # -- process audio in chunks of 30 seconds
72
+ chunk_size = sr * 30 # 30 seconds
73
+ text = ""
74
+ for start in range(0, len(audio), chunk_size):
75
+ end = min(start + chunk_size, len(audio))
76
+ chunk = audio[start:end]
77
+
78
+ # -- convert audio chunk to format for pipeline
79
+ chunk_file = "temp_chunk.wav"
80
+ librosa.output.write_wav(chunk_file, chunk, sr)
81
+
82
+ # -- chnk ad transcriptrauma
83
+ chunk_text = pipe(chunk_file)["text"]
84
+ text += chunk_text + " "
85
+
86
+ end_time = time.time()
87
+ output_time = end_time - start_time
88
+
89
+ # --word count
90
+ word_count = len(text.split())
91
+
92
+ # --memory metrics
93
+ memory = psutil.virtual_memory()
94
+
95
+ # --cpu metric
96
+ #cpu_usage = psutil.cpu_percent(interval=1)
97
+
98
+ # --gpu metric
99
+ #gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
100
+
101
+ # --system info string
102
+ system_info = f"""
103
+ #Processing time: {output_time:.2f} seconds.
104
+ #Number of words: {word_count}
105
+ GPU Memory: {gpu_memory}%
106
+ GPU Utilization: {gpu_utilization}%
107
+ """
108
+
109
+ return text.strip(), system_info
110
+
111
+
112
+ """
113
  #@spaces.GPU
114
  def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)): # microphone
115
 
 
142
 
143
  # --system info string
144
  system_info = f"""
145
+ #Processing time: {output_time:.2f} seconds.
146
+ #Number of words: {word_count}
147
+ #GPU Memory: {gpu_memory}"""
148
 
149
  #--------------____________________________________________--------------"
150
 
 
152
  #Memory used: {memory.percent}%
153
  #GPU Utilization: {gpu_utilization}%
154
 
155
+ #return text, system_info
156
 
157
+ """
158
 
159
  ###############################################################################
160
  # Interface.