Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import time
|
2 |
import os
|
3 |
import warnings
|
@@ -39,9 +46,20 @@ def convert_to_wav(audio_file):
|
|
39 |
# Initialize device for torch
|
40 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
41 |
|
42 |
-
# Load
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
def transcribe_audio(audio_file):
|
47 |
if audio_file.endswith(".m4a"):
|
@@ -49,23 +67,26 @@ def transcribe_audio(audio_file):
|
|
49 |
|
50 |
start_time = time.time()
|
51 |
|
|
|
|
|
52 |
# Load the audio file using torchaudio
|
53 |
-
waveform, sample_rate = torchaudio.load(audio_file)
|
54 |
-
|
55 |
# Process the waveform with Whisper's processor
|
56 |
-
input_features = whisper_processor(waveform, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)
|
57 |
|
58 |
# Generate the transcription
|
59 |
-
output = whisper_model.generate(input_features=input_features)
|
60 |
|
61 |
# Decode the output
|
62 |
-
text = whisper_processor.batch_decode(output, skip_special_tokens=True)[0]
|
63 |
|
64 |
output_time = time.time() - start_time
|
65 |
result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
|
66 |
|
67 |
return text, result
|
68 |
|
|
|
69 |
# Clean and preprocess text for summarization
|
70 |
def clean_text(text):
|
71 |
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
|
|
|
1 |
+
"""
|
2 |
+
|
3 |
+
This application processes audio files, transcribes them using a pretrained model (Whisper), and provides multiple summarization options for the transcribed text. The application also includes a PDF generation feature and is built with Gradio for the user interface
|
4 |
+
|
5 |
+
Webapp, transkribering (norsk), NbAiLab/nb-whisper-large, oppsummering, pdf-download.
|
6 |
+
"""
|
7 |
+
|
8 |
import time
|
9 |
import os
|
10 |
import warnings
|
|
|
46 |
# Initialize device for torch
|
47 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
48 |
|
49 |
+
# Load smoking-big-pipe
|
50 |
+
MODEL_NAME = "NbAiLab/nb-whisper-large"
|
51 |
+
lang = "no"
|
52 |
+
|
53 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
54 |
+
pipe = pipeline(
|
55 |
+
task="automatic-speech-recognition",
|
56 |
+
model=MODEL_NAME,
|
57 |
+
chunk_length_s=30,
|
58 |
+
device=device,
|
59 |
+
)
|
60 |
+
|
61 |
+
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
|
62 |
+
|
63 |
|
64 |
def transcribe_audio(audio_file):
|
65 |
if audio_file.endswith(".m4a"):
|
|
|
67 |
|
68 |
start_time = time.time()
|
69 |
|
70 |
+
text = pipe(audio_file)["text"]
|
71 |
+
|
72 |
# Load the audio file using torchaudio
|
73 |
+
#waveform, sample_rate = torchaudio.load(audio_file)
|
74 |
+
|
75 |
# Process the waveform with Whisper's processor
|
76 |
+
#input_features = whisper_processor(waveform, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)
|
77 |
|
78 |
# Generate the transcription
|
79 |
+
#output = whisper_model.generate(input_features=input_features)
|
80 |
|
81 |
# Decode the output
|
82 |
+
#text = whisper_processor.batch_decode(output, skip_special_tokens=True)[0]
|
83 |
|
84 |
output_time = time.time() - start_time
|
85 |
result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
|
86 |
|
87 |
return text, result
|
88 |
|
89 |
+
|
90 |
# Clean and preprocess text for summarization
|
91 |
def clean_text(text):
|
92 |
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
|