Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -38,30 +38,25 @@ def convert_to_wav(audio_file):
|
|
38 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
39 |
|
40 |
# Load Whisper model and tokenizer
|
41 |
-
|
42 |
-
|
43 |
-
summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
44 |
|
45 |
-
# Transcribe audio to text
|
46 |
def transcribe_audio(audio_file):
|
47 |
if audio_file.endswith(".m4a"):
|
48 |
audio_file = convert_to_wav(audio_file)
|
49 |
-
|
50 |
start_time = time.time()
|
51 |
|
52 |
-
#
|
53 |
-
|
54 |
-
|
55 |
|
56 |
-
# Generate the transcription
|
57 |
-
output =
|
58 |
-
inputs['input_ids'],
|
59 |
-
attention_mask=inputs['attention_mask']
|
60 |
-
)
|
61 |
|
62 |
# Decode the output
|
63 |
-
text =
|
64 |
-
|
65 |
output_time = time.time() - start_time
|
66 |
result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
|
67 |
|
@@ -171,7 +166,7 @@ iface = gr.Blocks()
|
|
171 |
with iface:
|
172 |
gr.HTML("""
|
173 |
<div style="text-align: center;">
|
174 |
-
<img src="https://huggingface.co/spaces/camparchimedes/transcription_app/
|
175 |
</div>
|
176 |
""")
|
177 |
gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
|
|
|
38 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
39 |
|
40 |
# Load Whisper model and tokenizer
|
41 |
+
whisper_processor = WhisperProcessor.from_pretrained("NbAiLab/nb-whisper-large")
|
42 |
+
whisper_model = torch.hub.load('huggingface/pytorch-transformers', 'model', "NbAiLab/nb-whisper-large").to(device)
|
|
|
43 |
|
|
|
44 |
def transcribe_audio(audio_file):
|
45 |
if audio_file.endswith(".m4a"):
|
46 |
audio_file = convert_to_wav(audio_file)
|
47 |
+
|
48 |
start_time = time.time()
|
49 |
|
50 |
+
# Load the audio file and process it with Whisper's processor
|
51 |
+
audio, sample_rate = whisper_processor.audio_to_array(audio_file)
|
52 |
+
input_features = whisper_processor(audio, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)
|
53 |
|
54 |
+
# Generate the transcription
|
55 |
+
output = whisper_model.generate(input_features=input_features)
|
|
|
|
|
|
|
56 |
|
57 |
# Decode the output
|
58 |
+
text = whisper_processor.batch_decode(output, skip_special_tokens=True)[0]
|
59 |
+
|
60 |
output_time = time.time() - start_time
|
61 |
result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
|
62 |
|
|
|
166 |
with iface:
|
167 |
gr.HTML("""
|
168 |
<div style="text-align: center;">
|
169 |
+
<img src="https://huggingface.co/spaces/camparchimedes/transcription_app/raw/main/banner_trans.png" alt="" width="100%" height="auto">
|
170 |
</div>
|
171 |
""")
|
172 |
gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")
|