import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM from wordcloud import WordCloud import matplotlib.pyplot as plt import numpy as np import soundfile as sf import torch import io import csv from datetime import datetime import os from PIL import Image # Set up translation and summarization models translation_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M") translation_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") device = 0 if torch.cuda.is_available() else -1 # Persistent state feedback_records = [] feedback_words = [] lang_map = { "Hindi": ("theainerd/Wav2Vec2-large-xlsr-hindi", "hin_Deva"), "Telugu": ("anuragshas/wav2vec2-large-xlsr-53-telugu", "tel_Telu"), "Tamil": ("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250", "tam_Taml"), "Kannada": ("vasista22/whisper-kannada-medium", "kan_Knda"), "English": ("openai/whisper-base.en", None) } def translate(text, src_lang, tgt_lang="eng_Latn"): translation_pipeline = pipeline( "translation", model=translation_model, tokenizer=translation_tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device ) return translation_pipeline(text)[0]["translation_text"] def process_feedback(audio_np, language, sample_file): if audio_np is None and sample_file: audio_np, _ = sf.read(sample_file) if audio_np is None: return "No audio provided", None model_name, src_lang = lang_map[language] transcriber = pipeline("automatic-speech-recognition", model=model_name) transcription = transcriber(audio_np)["text"] if language != "English": transcription = translate(transcription, src_lang=src_lang) summary = summarizer(transcription, max_length=60, min_length=10, do_sample=False)[0]["summary_text"] feedback_records.append({ "timestamp": datetime.utcnow().isoformat(), "language": language, "transcription": transcription, "summary": summary }) feedback_words.extend(summary.lower().split()) wordcloud_img = generate_wordcloud(feedback_words) return summary, wordcloud_img def generate_wordcloud(words): wordcloud = WordCloud(width=600, height=300, background_color="white").generate(" ".join(words)) fig, ax = plt.subplots(figsize=(6, 3)) ax.imshow(wordcloud, interpolation="bilinear") ax.axis("off") buf = io.BytesIO() plt.savefig(buf, format="png") plt.close(fig) buf.seek(0) return Image.open(buf) def export_to_csv(): filename = "feedback_export.csv" with open(filename, mode='w', newline='', encoding='utf-8') as file: writer = csv.DictWriter(file, fieldnames=["timestamp", "language", "transcription", "summary"]) writer.writeheader() for row in feedback_records: writer.writerow(row) return filename demo = gr.Blocks() with demo: gr.Markdown("# 🎙️ VoicePulse Multilingual Feedback Collector") gr.Markdown( "🗣️ **VoicePulse** lets you upload or speak feedback in your language — Telugu, Hindi, Tamil, Kannada, or English.\n" "It transcribes, translates, and summarizes the feedback, building a live word cloud to show what people care about." ) with gr.Row(): gr.Markdown("🔴 **Select language of the uploaded file**") with gr.Row(): audio_input = gr.Audio(type="numpy", label="📤 Upload your feedback audio") lang_dropdown = gr.Dropdown(label="🌐 Language", choices=["English", "Hindi", "Telugu", "Tamil", "Kannada"], value="English") with gr.Row(): sample_selector = gr.Dropdown( label="🎧 Try with sample audio (optional)", choices=[ "files/telugu.mp3", "files/hindi.mp3", "files/tamil.mp3", "files/english.mp3" ], value=None ) with gr.Row(): submit_btn = gr.Button("✅ Process Feedback") with gr.Row(): summary_out = gr.Textbox(label="📝 Summarized Feedback") wordcloud_out = gr.Image(type="pil", label="☁️ Word Cloud of All Feedback") with gr.Row(): export_btn = gr.Button("📁 Export Feedback to CSV") csv_file_output = gr.File(label="📄 Download CSV") submit_btn.click(process_feedback, inputs=[audio_input, lang_dropdown, sample_selector], outputs=[summary_out, wordcloud_out]) export_btn.click(export_to_csv, inputs=[], outputs=csv_file_output) demo.launch()