Cryptic commited on
Commit
cb9426c
·
1 Parent(s): 5a5050b
Files changed (2) hide show
  1. app.py +140 -28
  2. requirements.txt +6 -5
app.py CHANGED
@@ -1,38 +1,150 @@
1
  import streamlit as st
2
- from transformers import pipeline
 
 
3
  import numpy as np
 
 
4
  import soundfile as sf
5
 
6
- # Load models optimized for CPU
7
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1)
8
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
9
- question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1)
10
 
11
- # Streamlit UI
12
- st.title("Curate AI - Audio Transcription and Summarization")
 
13
 
14
- uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a"])
15
- if uploaded_file is not None:
16
- st.audio(uploaded_file, format='audio/wav')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Read the audio file into a NumPy array
19
- audio_data, sample_rate = sf.read(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- # Transcribing the audio
22
- st.write("Transcribing the audio...")
23
- lecture_text = transcriber(audio_data)["text"]
24
- st.write("Transcription: ", lecture_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # Summarization
27
- st.write("Summarizing the transcription...")
28
- num_words = len(lecture_text.split())
29
- max_length = min(num_words, 1024) # Max input for BART is 1024 tokens
30
- summary = summarizer(lecture_text, max_length=1024, min_length=int(max_length * 0.1), truncation=True)
31
- st.write("Summary: ", summary[0]['summary_text'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- # Question Generation
34
- context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions."
35
- st.write("Generating questions...")
36
- questions = question_generator(context, max_new_tokens=50)
37
- for question in questions:
38
- st.write(question["generated_text"])
 
1
  import streamlit as st
2
+ import tempfile
3
+ import os
4
+ import librosa
5
  import numpy as np
6
+ from transformers import pipeline
7
+ import torch
8
  import soundfile as sf
9
 
10
+ # Page configuration
11
+ st.set_page_config(page_title="Audio Processing App", layout="wide")
12
+ st.title("Audio Lecture Processing App")
 
13
 
14
+ # Initialize session state
15
+ if 'models_loaded' not in st.session_state:
16
+ st.session_state.models_loaded = False
17
 
18
+ @st.cache_resource
19
+ def load_models():
20
+ """Load ML models with proper error handling"""
21
+ try:
22
+ # Check for CUDA availability
23
+ device = 0 if torch.cuda.is_available() else -1
24
+
25
+ models = {
26
+ 'transcriber': pipeline("automatic-speech-recognition",
27
+ model="openai/whisper-tiny.en",
28
+ device=device),
29
+ 'summarizer': pipeline("summarization",
30
+ model="sshleifer/distilbart-cnn-12-6",
31
+ device=device)
32
+ }
33
+ return models, None
34
+ except Exception as e:
35
+ return None, f"Error loading models: {str(e)}"
36
 
37
+ def load_and_convert_audio(audio_path):
38
+ """Load audio using librosa and convert to WAV format"""
39
+ try:
40
+ # Load audio with librosa (handles many formats)
41
+ audio_data, sample_rate = librosa.load(audio_path, sr=16000) # Whisper expects 16kHz
42
+
43
+ # Convert to float32
44
+ audio_data = audio_data.astype(np.float32)
45
+
46
+ # Create a temporary WAV file
47
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_wav:
48
+ sf.write(temp_wav.name, audio_data, sample_rate, format='WAV')
49
+ return temp_wav.name
50
+ except Exception as e:
51
+ raise Exception(f"Error converting audio: {str(e)}")
52
 
53
+ def process_audio(audio_path, models):
54
+ """Process audio file with progress tracking"""
55
+ results = {}
56
+ temp_wav_path = None
57
+
58
+ try:
59
+ # Convert audio to compatible format
60
+ with st.spinner('Converting audio format...'):
61
+ temp_wav_path = load_and_convert_audio(audio_path)
62
+
63
+ # Transcription
64
+ with st.spinner('Transcribing audio...'):
65
+ results['transcription'] = models['transcriber'](temp_wav_path)["text"]
66
+
67
+ # Summarization
68
+ with st.spinner('Generating summary...'):
69
+ # Preprocess text
70
+ text = results['transcription']
71
+ num_words = len(text.split())
72
+ max_length = min(num_words, 1024)
73
+ max_length = int(max_length * 0.75)
74
+
75
+ summary = models['summarizer'](
76
+ text,
77
+ max_length=max_length,
78
+ min_length=int(max_length * 0.1),
79
+ truncation=True
80
+ )
81
+ results['summary'] = summary[0]['summary_text']
82
+
83
+ # Clean up summary
84
+ if not results['summary'].endswith((".", "!", "?")):
85
+ last_period_index = results['summary'].rfind(".")
86
+ if last_period_index != -1:
87
+ results['summary'] = results['summary'][:last_period_index + 1]
88
+
89
+ except Exception as e:
90
+ st.error(f"Error processing audio: {str(e)}")
91
+ return None
92
+
93
+ finally:
94
+ # Clean up temporary WAV file
95
+ if temp_wav_path and os.path.exists(temp_wav_path):
96
+ try:
97
+ os.unlink(temp_wav_path)
98
+ except:
99
+ pass
100
+
101
+ return results
102
 
103
+ # Main app
104
+ def main():
105
+ # Load models
106
+ if not st.session_state.models_loaded:
107
+ with st.spinner('Loading models... This may take a few minutes...'):
108
+ models, error = load_models()
109
+ if error:
110
+ st.error(error)
111
+ return
112
+ st.session_state.models_loaded = True
113
+ st.session_state.models = models
114
+
115
+ # File uploader with clear instructions
116
+ st.write("Upload an audio file of your lecture (supported formats: WAV, MP3, M4A, FLAC)")
117
+ uploaded_file = st.file_uploader("Choose a file", type=["wav", "mp3", "m4a", "flac"])
118
+
119
+ if uploaded_file is not None:
120
+ # Create a temporary file for the uploaded content
121
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as temp_audio_file:
122
+ temp_audio_file.write(uploaded_file.getbuffer())
123
+ temp_audio_path = temp_audio_file.name
124
+
125
+ try:
126
+ # Process the audio
127
+ results = process_audio(temp_audio_path, st.session_state.models)
128
+
129
+ if results:
130
+ # Display results in organized sections
131
+ st.subheader("📝 Transcription")
132
+ with st.expander("Show full transcription"):
133
+ st.write(results['transcription'])
134
+
135
+ st.subheader("📌 Summary")
136
+ st.write(results['summary'])
137
+
138
+ except Exception as e:
139
+ st.error(f"An unexpected error occurred: {str(e)}")
140
+
141
+ finally:
142
+ # Cleanup original uploaded file
143
+ if os.path.exists(temp_audio_path):
144
+ try:
145
+ os.unlink(temp_audio_path)
146
+ except:
147
+ pass
148
 
149
+ if __name__ == "__main__":
150
+ main()
 
 
 
 
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
- streamlit==1.23.0
2
- transformers==4.30.2
3
- torch==2.0.1
4
- soundfile==0.12.1
5
- numpy==1.23.5
 
 
1
+ streamlit
2
+ transformers
3
+ torch
4
+ soundfile
5
+ numpy
6
+ librosa