Spaces:
Sleeping
Sleeping
Cryptic
commited on
Commit
·
61bd7e7
1
Parent(s):
cb9426c
Test2
Browse files
app.py
CHANGED
@@ -25,7 +25,8 @@ def load_models():
|
|
25 |
models = {
|
26 |
'transcriber': pipeline("automatic-speech-recognition",
|
27 |
model="openai/whisper-tiny.en",
|
28 |
-
device=device
|
|
|
29 |
'summarizer': pipeline("summarization",
|
30 |
model="sshleifer/distilbart-cnn-12-6",
|
31 |
device=device)
|
@@ -60,25 +61,58 @@ def process_audio(audio_path, models):
|
|
60 |
with st.spinner('Converting audio format...'):
|
61 |
temp_wav_path = load_and_convert_audio(audio_path)
|
62 |
|
63 |
-
# Transcription
|
64 |
with st.spinner('Transcribing audio...'):
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
# Summarization
|
68 |
with st.spinner('Generating summary...'):
|
69 |
-
# Preprocess text
|
70 |
text = results['transcription']
|
71 |
-
num_words = len(text.split())
|
72 |
-
max_length = min(num_words, 1024)
|
73 |
-
max_length = int(max_length * 0.75)
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# Clean up summary
|
84 |
if not results['summary'].endswith((".", "!", "?")):
|
@@ -114,6 +148,7 @@ def main():
|
|
114 |
|
115 |
# File uploader with clear instructions
|
116 |
st.write("Upload an audio file of your lecture (supported formats: WAV, MP3, M4A, FLAC)")
|
|
|
117 |
uploaded_file = st.file_uploader("Choose a file", type=["wav", "mp3", "m4a", "flac"])
|
118 |
|
119 |
if uploaded_file is not None:
|
|
|
25 |
models = {
|
26 |
'transcriber': pipeline("automatic-speech-recognition",
|
27 |
model="openai/whisper-tiny.en",
|
28 |
+
device=device,
|
29 |
+
chunk_length_s=30), # Process in 30-second chunks
|
30 |
'summarizer': pipeline("summarization",
|
31 |
model="sshleifer/distilbart-cnn-12-6",
|
32 |
device=device)
|
|
|
61 |
with st.spinner('Converting audio format...'):
|
62 |
temp_wav_path = load_and_convert_audio(audio_path)
|
63 |
|
64 |
+
# Transcription with progress bar
|
65 |
with st.spinner('Transcribing audio...'):
|
66 |
+
# Use return_timestamps=True for long audio files
|
67 |
+
transcription = models['transcriber'](
|
68 |
+
temp_wav_path,
|
69 |
+
return_timestamps=True,
|
70 |
+
generate_kwargs={"task": "transcribe"}
|
71 |
+
)
|
72 |
+
|
73 |
+
# Extract full text from chunks
|
74 |
+
if isinstance(transcription, dict):
|
75 |
+
results['transcription'] = transcription['text']
|
76 |
+
else:
|
77 |
+
# Combine chunks maintaining order
|
78 |
+
results['transcription'] = ' '.join([chunk['text'] for chunk in transcription])
|
79 |
|
80 |
+
# Summarization with chunking for long text
|
81 |
with st.spinner('Generating summary...'):
|
|
|
82 |
text = results['transcription']
|
|
|
|
|
|
|
83 |
|
84 |
+
# Split long text into chunks of ~1000 words for summarization
|
85 |
+
words = text.split()
|
86 |
+
chunk_size = 1000
|
87 |
+
chunks = [' '.join(words[i:i + chunk_size])
|
88 |
+
for i in range(0, len(words), chunk_size)]
|
89 |
+
|
90 |
+
# Summarize each chunk
|
91 |
+
summaries = []
|
92 |
+
for i, chunk in enumerate(chunks):
|
93 |
+
st.progress((i + 1) / len(chunks))
|
94 |
+
|
95 |
+
summary = models['summarizer'](
|
96 |
+
chunk,
|
97 |
+
max_length=200,
|
98 |
+
min_length=50,
|
99 |
+
truncation=True
|
100 |
+
)
|
101 |
+
summaries.append(summary[0]['summary_text'])
|
102 |
+
|
103 |
+
# Combine summaries
|
104 |
+
combined_summary = ' '.join(summaries)
|
105 |
+
|
106 |
+
# Final summarization if multiple chunks exist
|
107 |
+
if len(summaries) > 1:
|
108 |
+
combined_summary = models['summarizer'](
|
109 |
+
combined_summary,
|
110 |
+
max_length=200,
|
111 |
+
min_length=50,
|
112 |
+
truncation=True
|
113 |
+
)[0]['summary_text']
|
114 |
+
|
115 |
+
results['summary'] = combined_summary
|
116 |
|
117 |
# Clean up summary
|
118 |
if not results['summary'].endswith((".", "!", "?")):
|
|
|
148 |
|
149 |
# File uploader with clear instructions
|
150 |
st.write("Upload an audio file of your lecture (supported formats: WAV, MP3, M4A, FLAC)")
|
151 |
+
st.write("Note: Processing long audio files may take several minutes.")
|
152 |
uploaded_file = st.file_uploader("Choose a file", type=["wav", "mp3", "m4a", "flac"])
|
153 |
|
154 |
if uploaded_file is not None:
|