Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
from wordcloud import WordCloud | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import soundfile as sf | |
import torch | |
import io | |
import csv | |
from datetime import datetime | |
import os | |
from PIL import Image | |
# Set up translation and summarization models | |
translation_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M") | |
translation_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
device = 0 if torch.cuda.is_available() else -1 | |
# Persistent state | |
feedback_records = [] | |
feedback_words = [] | |
lang_map = { | |
"Hindi": ("theainerd/Wav2Vec2-large-xlsr-hindi", "hin_Deva"), | |
"Telugu": ("anuragshas/wav2vec2-large-xlsr-53-telugu", "tel_Telu"), | |
"Tamil": ("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250", "tam_Taml"), | |
"Kannada": ("vasista22/whisper-kannada-medium", "kan_Knda"), | |
"English": ("openai/whisper-base.en", None) | |
} | |
def translate(text, src_lang, tgt_lang="eng_Latn"): | |
translation_pipeline = pipeline( | |
"translation", | |
model=translation_model, | |
tokenizer=translation_tokenizer, | |
src_lang=src_lang, | |
tgt_lang=tgt_lang, | |
max_length=400, | |
device=device | |
) | |
return translation_pipeline(text)[0]["translation_text"] | |
def process_feedback(audio_np, language, sample_file): | |
if audio_np is None and sample_file: | |
audio_np, _ = sf.read(sample_file) | |
if audio_np is None: | |
return "No audio provided", None | |
model_name, src_lang = lang_map[language] | |
transcriber = pipeline("automatic-speech-recognition", model=model_name) | |
transcription = transcriber(audio_np)["text"] | |
if language != "English": | |
transcription = translate(transcription, src_lang=src_lang) | |
summary = summarizer(transcription, max_length=60, min_length=10, do_sample=False)[0]["summary_text"] | |
feedback_records.append({ | |
"timestamp": datetime.utcnow().isoformat(), | |
"language": language, | |
"transcription": transcription, | |
"summary": summary | |
}) | |
feedback_words.extend(summary.lower().split()) | |
wordcloud_img = generate_wordcloud(feedback_words) | |
return summary, wordcloud_img | |
def generate_wordcloud(words): | |
wordcloud = WordCloud(width=600, height=300, background_color="white").generate(" ".join(words)) | |
fig, ax = plt.subplots(figsize=(6, 3)) | |
ax.imshow(wordcloud, interpolation="bilinear") | |
ax.axis("off") | |
buf = io.BytesIO() | |
plt.savefig(buf, format="png") | |
plt.close(fig) | |
buf.seek(0) | |
return Image.open(buf) | |
def export_to_csv(): | |
filename = "feedback_export.csv" | |
with open(filename, mode='w', newline='', encoding='utf-8') as file: | |
writer = csv.DictWriter(file, fieldnames=["timestamp", "language", "transcription", "summary"]) | |
writer.writeheader() | |
for row in feedback_records: | |
writer.writerow(row) | |
return filename | |
demo = gr.Blocks() | |
with demo: | |
gr.Markdown("# ποΈ VoicePulse Multilingual Feedback Collector") | |
gr.Markdown( | |
"π£οΈ **VoicePulse** lets you upload or speak feedback in your language β Telugu, Hindi, Tamil, Kannada, or English.\n" | |
"It transcribes, translates, and summarizes the feedback, building a live word cloud to show what people care about." | |
) | |
with gr.Row(): | |
gr.Markdown("π΄ **Select language of the uploaded file**") | |
with gr.Row(): | |
audio_input = gr.Audio(type="numpy", label="π€ Upload your feedback audio") | |
lang_dropdown = gr.Dropdown(label="π Language", choices=["English", "Hindi", "Telugu", "Tamil", "Kannada"], value="English") | |
with gr.Row(): | |
sample_selector = gr.Dropdown( | |
label="π§ Try with sample audio (optional)", | |
choices=[ | |
"files/telugu.mp3", | |
"files/hindi.mp3", | |
"files/tamil.mp3", | |
"files/english.mp3" | |
], | |
value=None | |
) | |
with gr.Row(): | |
submit_btn = gr.Button("β Process Feedback") | |
with gr.Row(): | |
summary_out = gr.Textbox(label="π Summarized Feedback") | |
wordcloud_out = gr.Image(type="pil", label="βοΈ Word Cloud of All Feedback") | |
with gr.Row(): | |
export_btn = gr.Button("π Export Feedback to CSV") | |
csv_file_output = gr.File(label="π Download CSV") | |
submit_btn.click(process_feedback, inputs=[audio_input, lang_dropdown, sample_selector], outputs=[summary_out, wordcloud_out]) | |
export_btn.click(export_to_csv, inputs=[], outputs=csv_file_output) | |
demo.launch() | |