Spaces:
Sleeping
Sleeping
File size: 4,707 Bytes
060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 9dd2418 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 959435e 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 9dd2418 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 060e0ca 4b90f98 959435e 060e0ca 4b90f98 060e0ca 959435e 060e0ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
import torch
import io
import csv
from datetime import datetime
import os
from PIL import Image
# Set up translation and summarization models
translation_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
translation_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
device = 0 if torch.cuda.is_available() else -1
# Persistent state
feedback_records = []
feedback_words = []
lang_map = {
"Hindi": ("theainerd/Wav2Vec2-large-xlsr-hindi", "hin_Deva"),
"Telugu": ("anuragshas/wav2vec2-large-xlsr-53-telugu", "tel_Telu"),
"Tamil": ("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250", "tam_Taml"),
"Kannada": ("vasista22/whisper-kannada-medium", "kan_Knda"),
"English": ("openai/whisper-base.en", None)
}
def translate(text, src_lang, tgt_lang="eng_Latn"):
translation_pipeline = pipeline(
"translation",
model=translation_model,
tokenizer=translation_tokenizer,
src_lang=src_lang,
tgt_lang=tgt_lang,
max_length=400,
device=device
)
return translation_pipeline(text)[0]["translation_text"]
def process_feedback(audio_np, language, sample_file):
if audio_np is None and sample_file:
audio_np, _ = sf.read(sample_file)
if audio_np is None:
return "No audio provided", None
model_name, src_lang = lang_map[language]
transcriber = pipeline("automatic-speech-recognition", model=model_name)
transcription = transcriber(audio_np)["text"]
if language != "English":
transcription = translate(transcription, src_lang=src_lang)
summary = summarizer(transcription, max_length=60, min_length=10, do_sample=False)[0]["summary_text"]
feedback_records.append({
"timestamp": datetime.utcnow().isoformat(),
"language": language,
"transcription": transcription,
"summary": summary
})
feedback_words.extend(summary.lower().split())
wordcloud_img = generate_wordcloud(feedback_words)
return summary, wordcloud_img
def generate_wordcloud(words):
wordcloud = WordCloud(width=600, height=300, background_color="white").generate(" ".join(words))
fig, ax = plt.subplots(figsize=(6, 3))
ax.imshow(wordcloud, interpolation="bilinear")
ax.axis("off")
buf = io.BytesIO()
plt.savefig(buf, format="png")
plt.close(fig)
buf.seek(0)
return Image.open(buf)
def export_to_csv():
filename = "feedback_export.csv"
with open(filename, mode='w', newline='', encoding='utf-8') as file:
writer = csv.DictWriter(file, fieldnames=["timestamp", "language", "transcription", "summary"])
writer.writeheader()
for row in feedback_records:
writer.writerow(row)
return filename
demo = gr.Blocks()
with demo:
gr.Markdown("# ποΈ VoicePulse Multilingual Feedback Collector")
gr.Markdown(
"π£οΈ **VoicePulse** lets you upload or speak feedback in your language β Telugu, Hindi, Tamil, Kannada, or English.\n"
"It transcribes, translates, and summarizes the feedback, building a live word cloud to show what people care about."
)
with gr.Row():
gr.Markdown("π΄ **Select language of the uploaded file**")
with gr.Row():
audio_input = gr.Audio(type="numpy", label="π€ Upload your feedback audio")
lang_dropdown = gr.Dropdown(label="π Language", choices=["English", "Hindi", "Telugu", "Tamil", "Kannada"], value="English")
with gr.Row():
sample_selector = gr.Dropdown(
label="π§ Try with sample audio (optional)",
choices=[
"files/telugu.mp3",
"files/hindi.mp3",
"files/tamil.mp3",
"files/english.mp3"
],
value=None
)
with gr.Row():
submit_btn = gr.Button("β
Process Feedback")
with gr.Row():
summary_out = gr.Textbox(label="π Summarized Feedback")
wordcloud_out = gr.Image(type="pil", label="βοΈ Word Cloud of All Feedback")
with gr.Row():
export_btn = gr.Button("π Export Feedback to CSV")
csv_file_output = gr.File(label="π Download CSV")
submit_btn.click(process_feedback, inputs=[audio_input, lang_dropdown, sample_selector], outputs=[summary_out, wordcloud_out])
export_btn.click(export_to_csv, inputs=[], outputs=csv_file_output)
demo.launch()
|