File size: 1,905 Bytes
644d52a
db865fa
 
 
 
 
abc0e8f
 
644d52a
abc0e8f
 
db865fa
644d52a
abc0e8f
db865fa
 
 
644d52a
abc0e8f
db865fa
 
 
 
 
644d52a
abc0e8f
db865fa
644d52a
db865fa
 
644d52a
abc0e8f
db865fa
abc0e8f
 
 
 
644d52a
abc0e8f
db865fa
 
644d52a
abc0e8f
644d52a
abc0e8f
644d52a
abc0e8f
 
 
644d52a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

import streamlit as st
from transformers import pipeline
from pydub import AudioSegment
import os

# Page config
st.set_page_config(page_title="Atma.ai - Advanced Session Summarizer", layout="centered")

st.title("🧠 Atma.ai – Advanced Mental Health Session Summarizer")
st.markdown("Upload a recorded therapy session to get a structured summary and emotional tone analysis. Now enhanced with dialogue-aware summarization!")

# Upload audio
uploaded_file = st.file_uploader("πŸŽ™οΈ Upload audio file", type=["wav", "mp3", "m4a"])

if uploaded_file:
    st.audio(uploaded_file)

    # Convert audio to required format
    audio_path = "temp_audio.wav"
    audio = AudioSegment.from_file(uploaded_file)
    audio = audio.set_channels(1).set_frame_rate(16000)
    audio.export(audio_path, format="wav")

    # Transcribe
    st.info("πŸ”„ Transcribing with Whisper...")
    asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
    result = asr(audio_path, return_timestamps=True)
    transcript = result["text"]

    st.subheader("πŸ“ Transcript")
    st.markdown(transcript)

    # Dialogue-aware summarization using SAMSum-tuned model
    st.info("πŸ“‹ Summarizing conversation contextually...")
    summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
    summary = summarizer(transcript, max_length=256, min_length=60, do_sample=False)

    st.subheader("πŸ“Œ Summary")
    st.write(summary[0]["summary_text"])

    # Emotion tagging
    st.info("🎭 Extracting emotional tones...")
    emotion_model = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
    emotion_scores = emotion_model(transcript)

    st.subheader("πŸ’¬ Emotional Insights (Overall)")
    for emo in emotion_scores[0]:
        st.write(f"{emo['label']}: {round(emo['score']*100, 2)}%")

    os.remove(audio_path)