Spaces:
Sleeping
Sleeping
File size: 1,905 Bytes
644d52a db865fa abc0e8f 644d52a abc0e8f db865fa 644d52a abc0e8f db865fa 644d52a abc0e8f db865fa 644d52a abc0e8f db865fa 644d52a db865fa 644d52a abc0e8f db865fa abc0e8f 644d52a abc0e8f db865fa 644d52a abc0e8f 644d52a abc0e8f 644d52a abc0e8f 644d52a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import streamlit as st
from transformers import pipeline
from pydub import AudioSegment
import os
# Page config
st.set_page_config(page_title="Atma.ai - Advanced Session Summarizer", layout="centered")
st.title("π§ Atma.ai β Advanced Mental Health Session Summarizer")
st.markdown("Upload a recorded therapy session to get a structured summary and emotional tone analysis. Now enhanced with dialogue-aware summarization!")
# Upload audio
uploaded_file = st.file_uploader("ποΈ Upload audio file", type=["wav", "mp3", "m4a"])
if uploaded_file:
st.audio(uploaded_file)
# Convert audio to required format
audio_path = "temp_audio.wav"
audio = AudioSegment.from_file(uploaded_file)
audio = audio.set_channels(1).set_frame_rate(16000)
audio.export(audio_path, format="wav")
# Transcribe
st.info("π Transcribing with Whisper...")
asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
result = asr(audio_path, return_timestamps=True)
transcript = result["text"]
st.subheader("π Transcript")
st.markdown(transcript)
# Dialogue-aware summarization using SAMSum-tuned model
st.info("π Summarizing conversation contextually...")
summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
summary = summarizer(transcript, max_length=256, min_length=60, do_sample=False)
st.subheader("π Summary")
st.write(summary[0]["summary_text"])
# Emotion tagging
st.info("π Extracting emotional tones...")
emotion_model = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
emotion_scores = emotion_model(transcript)
st.subheader("π¬ Emotional Insights (Overall)")
for emo in emotion_scores[0]:
st.write(f"{emo['label']}: {round(emo['score']*100, 2)}%")
os.remove(audio_path)
|