Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
from pydub import AudioSegment | |
import os | |
# Page config | |
st.set_page_config(page_title="Atma.ai - Mixed Language Session Summarizer", layout="centered") | |
st.title("π§ Atma.ai β Mixed Language Session Summarizer") | |
st.markdown("Upload a therapy session audio file in Tamil-English mix to get a clean transcript, contextual summary, and emotional analysis.") | |
# Upload audio | |
uploaded_file = st.file_uploader("ποΈ Upload audio file", type=["wav", "mp3", "m4a"]) | |
if uploaded_file: | |
st.audio(uploaded_file) | |
# Convert audio to required format | |
audio_path = "temp_audio.wav" | |
audio = AudioSegment.from_file(uploaded_file) | |
audio = audio.set_channels(1).set_frame_rate(16000) | |
audio.export(audio_path, format="wav") | |
# Transcribe with explicit language forcing | |
st.info("π Transcribing with Whisper (mixed-language support)...") | |
try: | |
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large") | |
result = asr(audio_path, return_timestamps=True, generate_kwargs={"language": "<|en|>"}) | |
transcript = result.get("text", "") | |
if not transcript: | |
st.error("β Could not generate a transcript. Please try a different audio.") | |
else: | |
st.subheader("π Transcript") | |
st.markdown(transcript) | |
# Summarize | |
st.info("π Summarizing conversation...") | |
summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum") | |
try: | |
summary = summarizer(transcript, max_length=256, min_length=60, do_sample=False) | |
st.subheader("π Summary") | |
st.write(summary[0]["summary_text"]) | |
except Exception as e: | |
st.error(f"β οΈ Could not summarize: {e}") | |
# Emotion tagging | |
st.info("π Extracting emotional tones...") | |
try: | |
emotion_model = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True) | |
emotion_scores = emotion_model(transcript) | |
st.subheader("π¬ Emotional Insights (Overall)") | |
for emo in emotion_scores[0]: | |
st.write(f"{emo['label']}: {round(emo['score']*100, 2)}%") | |
except Exception as e: | |
st.warning(f"β οΈ Emotion detection skipped due to error: {e}") | |
except Exception as err: | |
st.error(f"β Transcription failed: {err}") | |
finally: | |
if os.path.exists(audio_path): | |
os.remove(audio_path) | |