Spaces:

jaisun2004
/

atmasessionsummarizer

Sleeping

App Files Files Community

jaisun2004 commited on Apr 5

Commit

21c5c47

verified ·

1 Parent(s): abc0e8f

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -30

app.py CHANGED Viewed

@@ -5,10 +5,10 @@ from pydub import AudioSegment
 import os
 # Page config
-st.set_page_config(page_title="Atma.ai - Advanced Session Summarizer", layout="centered")
-st.title("🧠 Atma.ai – Advanced Mental Health Session Summarizer")
-st.markdown("Upload a recorded therapy session to get a structured summary and emotional tone analysis. Now enhanced with dialogue-aware summarization!")
 # Upload audio
 uploaded_file = st.file_uploader("🎙️ Upload audio file", type=["wav", "mp3", "m4a"])
@@ -22,30 +22,41 @@ if uploaded_file:
     audio = audio.set_channels(1).set_frame_rate(16000)
     audio.export(audio_path, format="wav")
-    # Transcribe
-    st.info("🔄 Transcribing with Whisper...")
-    asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
-    result = asr(audio_path, return_timestamps=True)
-    transcript = result["text"]
-    st.subheader("📝 Transcript")
-    st.markdown(transcript)
-    # Dialogue-aware summarization using SAMSum-tuned model
-    st.info("📋 Summarizing conversation contextually...")
-    summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
-    summary = summarizer(transcript, max_length=256, min_length=60, do_sample=False)
-    st.subheader("📌 Summary")
-    st.write(summary[0]["summary_text"])
-    # Emotion tagging
-    st.info("🎭 Extracting emotional tones...")
-    emotion_model = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
-    emotion_scores = emotion_model(transcript)
-    st.subheader("💬 Emotional Insights (Overall)")
-    for emo in emotion_scores[0]:
-        st.write(f"{emo['label']}: {round(emo['score']*100, 2)}%")
-    os.remove(audio_path)

 import os
 # Page config
+st.set_page_config(page_title="Atma.ai - Mixed Language Session Summarizer", layout="centered")
+st.title("🧠 Atma.ai – Mixed Language Session Summarizer")
+st.markdown("Upload a therapy session audio file in Tamil-English mix to get a clean transcript, contextual summary, and emotional analysis.")
 # Upload audio
 uploaded_file = st.file_uploader("🎙️ Upload audio file", type=["wav", "mp3", "m4a"])
     audio = audio.set_channels(1).set_frame_rate(16000)
     audio.export(audio_path, format="wav")
+    # Transcribe with explicit language forcing
+    st.info("🔄 Transcribing with Whisper (mixed-language support)...")
+    try:
+        asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
+        result = asr(audio_path, return_timestamps=True, generate_kwargs={"language": "<|en|>"})
+        transcript = result.get("text", "")
+        if not transcript:
+            st.error("❌ Could not generate a transcript. Please try a different audio.")
+        else:
+            st.subheader("📝 Transcript")
+            st.markdown(transcript)
+            # Summarize
+            st.info("📋 Summarizing conversation...")
+            summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
+            try:
+                summary = summarizer(transcript, max_length=256, min_length=60, do_sample=False)
+                st.subheader("📌 Summary")
+                st.write(summary[0]["summary_text"])
+            except Exception as e:
+                st.error(f"⚠️ Could not summarize: {e}")
+            # Emotion tagging
+            st.info("🎭 Extracting emotional tones...")
+            try:
+                emotion_model = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
+                emotion_scores = emotion_model(transcript)
+                st.subheader("💬 Emotional Insights (Overall)")
+                for emo in emotion_scores[0]:
+                    st.write(f"{emo['label']}: {round(emo['score']*100, 2)}%")
+            except Exception as e:
+                st.warning(f"⚠️ Emotion detection skipped due to error: {e}")
+    except Exception as err:
+        st.error(f"❌ Transcription failed: {err}")
+    finally:
+        if os.path.exists(audio_path):
+            os.remove(audio_path)