File size: 2,630 Bytes
644d52a
db865fa
 
 
 
 
abc0e8f
21c5c47
644d52a
21c5c47
 
db865fa
644d52a
abc0e8f
db865fa
 
 
644d52a
abc0e8f
db865fa
 
 
 
 
21c5c47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

import streamlit as st
from transformers import pipeline
from pydub import AudioSegment
import os

# Page config
st.set_page_config(page_title="Atma.ai - Mixed Language Session Summarizer", layout="centered")

st.title("🧠 Atma.ai – Mixed Language Session Summarizer")
st.markdown("Upload a therapy session audio file in Tamil-English mix to get a clean transcript, contextual summary, and emotional analysis.")

# Upload audio
uploaded_file = st.file_uploader("πŸŽ™οΈ Upload audio file", type=["wav", "mp3", "m4a"])

if uploaded_file:
    st.audio(uploaded_file)

    # Convert audio to required format
    audio_path = "temp_audio.wav"
    audio = AudioSegment.from_file(uploaded_file)
    audio = audio.set_channels(1).set_frame_rate(16000)
    audio.export(audio_path, format="wav")

    # Transcribe with explicit language forcing
    st.info("πŸ”„ Transcribing with Whisper (mixed-language support)...")
    try:
        asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
        result = asr(audio_path, return_timestamps=True, generate_kwargs={"language": "<|en|>"})
        transcript = result.get("text", "")

        if not transcript:
            st.error("❌ Could not generate a transcript. Please try a different audio.")
        else:
            st.subheader("πŸ“ Transcript")
            st.markdown(transcript)

            # Summarize
            st.info("πŸ“‹ Summarizing conversation...")
            summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
            try:
                summary = summarizer(transcript, max_length=256, min_length=60, do_sample=False)
                st.subheader("πŸ“Œ Summary")
                st.write(summary[0]["summary_text"])
            except Exception as e:
                st.error(f"⚠️ Could not summarize: {e}")

            # Emotion tagging
            st.info("🎭 Extracting emotional tones...")
            try:
                emotion_model = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
                emotion_scores = emotion_model(transcript)
                st.subheader("πŸ’¬ Emotional Insights (Overall)")
                for emo in emotion_scores[0]:
                    st.write(f"{emo['label']}: {round(emo['score']*100, 2)}%")
            except Exception as e:
                st.warning(f"⚠️ Emotion detection skipped due to error: {e}")
    except Exception as err:
        st.error(f"❌ Transcription failed: {err}")
    finally:
        if os.path.exists(audio_path):
            os.remove(audio_path)