Spaces:

aryankeesara
/

audiosummarymodel

Sleeping

App Files Files Community

Cryptic commited on Oct 22, 2024

Commit

cb9426c

1 Parent(s): 5a5050b

Test

Browse files

Files changed (2) hide show

app.py +140 -28
requirements.txt +6 -5

app.py CHANGED Viewed

@@ -1,38 +1,150 @@
 import streamlit as st
-from transformers import pipeline
 import numpy as np
 import soundfile as sf
-# Load models optimized for CPU
-transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1)
-summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
-question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1)
-# Streamlit UI
-st.title("Curate AI - Audio Transcription and Summarization")
-uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a"])
-if uploaded_file is not None:
-    st.audio(uploaded_file, format='audio/wav')
-    # Read the audio file into a NumPy array
-    audio_data, sample_rate = sf.read(uploaded_file)
-    # Transcribing the audio
-    st.write("Transcribing the audio...")
-    lecture_text = transcriber(audio_data)["text"]
-    st.write("Transcription: ", lecture_text)
-    # Summarization
-    st.write("Summarizing the transcription...")
-    num_words = len(lecture_text.split())
-    max_length = min(num_words, 1024)  # Max input for BART is 1024 tokens
-    summary = summarizer(lecture_text, max_length=1024, min_length=int(max_length * 0.1), truncation=True)
-    st.write("Summary: ", summary[0]['summary_text'])
-    # Question Generation
-    context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions."
-    st.write("Generating questions...")
-    questions = question_generator(context, max_new_tokens=50)
-    for question in questions:
-        st.write(question["generated_text"])

 import streamlit as st
+import tempfile
+import os
+import librosa
 import numpy as np
+from transformers import pipeline
+import torch
 import soundfile as sf
+# Page configuration
+st.set_page_config(page_title="Audio Processing App", layout="wide")
+st.title("Audio Lecture Processing App")
+# Initialize session state
+if 'models_loaded' not in st.session_state:
+    st.session_state.models_loaded = False
+@st.cache_resource
+def load_models():
+    """Load ML models with proper error handling"""
+    try:
+        # Check for CUDA availability
+        device = 0 if torch.cuda.is_available() else -1
+        models = {
+            'transcriber': pipeline("automatic-speech-recognition",
+                                 model="openai/whisper-tiny.en",
+                                 device=device),
+            'summarizer': pipeline("summarization",
+                                model="sshleifer/distilbart-cnn-12-6",
+                                device=device)
+        }
+        return models, None
+    except Exception as e:
+        return None, f"Error loading models: {str(e)}"
+def load_and_convert_audio(audio_path):
+    """Load audio using librosa and convert to WAV format"""
+    try:
+        # Load audio with librosa (handles many formats)
+        audio_data, sample_rate = librosa.load(audio_path, sr=16000)  # Whisper expects 16kHz
+        # Convert to float32
+        audio_data = audio_data.astype(np.float32)
+        # Create a temporary WAV file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_wav:
+            sf.write(temp_wav.name, audio_data, sample_rate, format='WAV')
+            return temp_wav.name
+    except Exception as e:
+        raise Exception(f"Error converting audio: {str(e)}")
+def process_audio(audio_path, models):
+    """Process audio file with progress tracking"""
+    results = {}
+    temp_wav_path = None
+    try:
+        # Convert audio to compatible format
+        with st.spinner('Converting audio format...'):
+            temp_wav_path = load_and_convert_audio(audio_path)
+        # Transcription
+        with st.spinner('Transcribing audio...'):
+            results['transcription'] = models['transcriber'](temp_wav_path)["text"]
+        # Summarization
+        with st.spinner('Generating summary...'):
+            # Preprocess text
+            text = results['transcription']
+            num_words = len(text.split())
+            max_length = min(num_words, 1024)
+            max_length = int(max_length * 0.75)
+            summary = models['summarizer'](
+                text,
+                max_length=max_length,
+                min_length=int(max_length * 0.1),
+                truncation=True
+            )
+            results['summary'] = summary[0]['summary_text']
+            # Clean up summary
+            if not results['summary'].endswith((".", "!", "?")):
+                last_period_index = results['summary'].rfind(".")
+                if last_period_index != -1:
+                    results['summary'] = results['summary'][:last_period_index + 1]
+    except Exception as e:
+        st.error(f"Error processing audio: {str(e)}")
+        return None
+    finally:
+        # Clean up temporary WAV file
+        if temp_wav_path and os.path.exists(temp_wav_path):
+            try:
+                os.unlink(temp_wav_path)
+            except:
+                pass
+    return results
+# Main app
+def main():
+    # Load models
+    if not st.session_state.models_loaded:
+        with st.spinner('Loading models... This may take a few minutes...'):
+            models, error = load_models()
+            if error:
+                st.error(error)
+                return
+            st.session_state.models_loaded = True
+            st.session_state.models = models
+    # File uploader with clear instructions
+    st.write("Upload an audio file of your lecture (supported formats: WAV, MP3, M4A, FLAC)")
+    uploaded_file = st.file_uploader("Choose a file", type=["wav", "mp3", "m4a", "flac"])
+    if uploaded_file is not None:
+        # Create a temporary file for the uploaded content
+        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as temp_audio_file:
+            temp_audio_file.write(uploaded_file.getbuffer())
+            temp_audio_path = temp_audio_file.name
+        try:
+            # Process the audio
+            results = process_audio(temp_audio_path, st.session_state.models)
+            if results:
+                # Display results in organized sections
+                st.subheader("📝 Transcription")
+                with st.expander("Show full transcription"):
+                    st.write(results['transcription'])
+                st.subheader("📌 Summary")
+                st.write(results['summary'])
+        except Exception as e:
+            st.error(f"An unexpected error occurred: {str(e)}")
+        finally:
+            # Cleanup original uploaded file
+            if os.path.exists(temp_audio_path):
+                try:
+                    os.unlink(temp_audio_path)
+                except:
+                    pass
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
-streamlit==1.23.0
-transformers==4.30.2
-torch==2.0.1
-soundfile==0.12.1
-numpy==1.23.5

+streamlit
+transformers
+torch
+soundfile
+numpy
+librosa