jaisun2004's picture
Update app.py
abc0e8f verified
raw
history blame
1.91 kB
import streamlit as st
from transformers import pipeline
from pydub import AudioSegment
import os
# Page config
st.set_page_config(page_title="Atma.ai - Advanced Session Summarizer", layout="centered")
st.title("🧠 Atma.ai – Advanced Mental Health Session Summarizer")
st.markdown("Upload a recorded therapy session to get a structured summary and emotional tone analysis. Now enhanced with dialogue-aware summarization!")
# Upload audio
uploaded_file = st.file_uploader("πŸŽ™οΈ Upload audio file", type=["wav", "mp3", "m4a"])
if uploaded_file:
st.audio(uploaded_file)
# Convert audio to required format
audio_path = "temp_audio.wav"
audio = AudioSegment.from_file(uploaded_file)
audio = audio.set_channels(1).set_frame_rate(16000)
audio.export(audio_path, format="wav")
# Transcribe
st.info("πŸ”„ Transcribing with Whisper...")
asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
result = asr(audio_path, return_timestamps=True)
transcript = result["text"]
st.subheader("πŸ“ Transcript")
st.markdown(transcript)
# Dialogue-aware summarization using SAMSum-tuned model
st.info("πŸ“‹ Summarizing conversation contextually...")
summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
summary = summarizer(transcript, max_length=256, min_length=60, do_sample=False)
st.subheader("πŸ“Œ Summary")
st.write(summary[0]["summary_text"])
# Emotion tagging
st.info("🎭 Extracting emotional tones...")
emotion_model = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
emotion_scores = emotion_model(transcript)
st.subheader("πŸ’¬ Emotional Insights (Overall)")
for emo in emotion_scores[0]:
st.write(f"{emo['label']}: {round(emo['score']*100, 2)}%")
os.remove(audio_path)