Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,45 +1,33 @@
|
|
1 |
-
# voicepulse.py (multilingual feedback transcriber and word cloud generator with export)
|
2 |
|
3 |
import gradio as gr
|
4 |
-
import numpy as np
|
5 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
-
import matplotlib.pyplot as plt
|
7 |
from wordcloud import WordCloud
|
8 |
-
import
|
9 |
-
import
|
|
|
10 |
import torch
|
|
|
11 |
import csv
|
12 |
-
from
|
13 |
-
|
14 |
-
nltk.download("stopwords")
|
15 |
-
stop_words = set(nltk.corpus.stopwords.words("english"))
|
16 |
|
17 |
-
#
|
18 |
translation_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
19 |
translation_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
|
|
20 |
device = 0 if torch.cuda.is_available() else -1
|
21 |
|
22 |
-
#
|
|
|
23 |
feedback_words = []
|
24 |
-
all_feedback = []
|
25 |
-
|
26 |
-
# Sample audios
|
27 |
-
sample_texts = {
|
28 |
-
"Telugu Sample": "మీ సేవలు చాలా బాగున్నాయి. మేము చాలా సంతృప్తిగా ఉన్నాము.",
|
29 |
-
"Hindi Sample": "आपकी सेवा बहुत अच्छी थी और हम संतुष्ट हैं।",
|
30 |
-
"Tamil Sample": "உங்கள் சேவை மிகவும் சிறந்ததாக இருந்தது. நாங்கள் திருப்தி அடைந்தோம்.",
|
31 |
-
"English Sample": "Your support team was helpful and responsive."
|
32 |
-
}
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
short_audio.export("sample.mp3", format="mp3")
|
42 |
-
return "sample.mp3"
|
43 |
|
44 |
def translate(text, src_lang, tgt_lang="eng_Latn"):
|
45 |
translation_pipeline = pipeline(
|
@@ -51,90 +39,76 @@ def translate(text, src_lang, tgt_lang="eng_Latn"):
|
|
51 |
max_length=400,
|
52 |
device=device
|
53 |
)
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
if language == "English":
|
63 |
-
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
64 |
-
return transcriber({"sampling_rate": sr, "raw": y})["text"]
|
65 |
-
|
66 |
-
model_map = {
|
67 |
-
"Hindi": ("theainerd/Wav2Vec2-large-xlsr-hindi", "hin_Deva"),
|
68 |
-
"Telugu": ("anuragshas/wav2vec2-large-xlsr-53-telugu", "tel_Telu"),
|
69 |
-
"Tamil": ("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250", "tam_Taml"),
|
70 |
-
"Kannada": ("vasista22/whisper-kannada-medium", "kan_Knda")
|
71 |
-
}
|
72 |
-
|
73 |
-
model_name, src_lang = model_map[language]
|
74 |
transcriber = pipeline("automatic-speech-recognition", model=model_name)
|
75 |
-
|
76 |
-
|
|
|
|
|
77 |
|
78 |
-
|
79 |
-
|
80 |
|
81 |
-
#
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
84 |
|
85 |
-
#
|
86 |
-
|
|
|
87 |
|
88 |
-
|
89 |
-
words = [w for w in summary.lower().split() if w.isalpha() and w not in stop_words]
|
90 |
-
feedback_words.extend(words)
|
91 |
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
94 |
buf = io.BytesIO()
|
95 |
-
plt.imshow(wc, interpolation="bilinear")
|
96 |
-
plt.axis("off")
|
97 |
plt.savefig(buf, format="png")
|
|
|
98 |
buf.seek(0)
|
99 |
-
|
100 |
-
|
101 |
-
return summary, image
|
102 |
|
103 |
def export_to_csv():
|
104 |
-
|
105 |
-
|
106 |
-
writer = csv.DictWriter(
|
107 |
writer.writeheader()
|
108 |
-
for row in
|
109 |
writer.writerow(row)
|
110 |
-
return
|
111 |
-
|
112 |
|
113 |
demo = gr.Blocks()
|
114 |
|
115 |
with demo:
|
116 |
gr.Markdown("# 🎙️ VoicePulse Multilingual Feedback Collector")
|
117 |
-
gr.Markdown(
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
""")
|
122 |
|
123 |
with gr.Row():
|
124 |
-
|
125 |
-
lang_dropdown = gr.Dropdown(label="🌐 Language", choices=["English", "Hindi", "Telugu", "Tamil", "Kannada"], value="English")
|
126 |
|
127 |
with gr.Row():
|
128 |
-
|
129 |
-
|
|
|
130 |
with gr.Row():
|
131 |
-
gr.
|
132 |
-
|
133 |
-
sample_btn_hi = gr.Button("🔉 Hindi Sample")
|
134 |
-
sample_btn_ta = gr.Button("🔉 Tamil Sample")
|
135 |
-
sample_btn_en = gr.Button("🔉 English Sample")
|
136 |
-
sample_audio = gr.Audio(label="🔊 Sample Audio Output (Preview)")
|
137 |
-
|
138 |
with gr.Row():
|
139 |
summary_out = gr.Textbox(label="📝 Summarized Feedback")
|
140 |
wordcloud_out = gr.Image(type="pil", label="☁️ Word Cloud of All Feedback")
|
@@ -145,9 +119,5 @@ with demo:
|
|
145 |
|
146 |
submit_btn.click(process_feedback, inputs=[audio_input, lang_dropdown], outputs=[summary_out, wordcloud_out])
|
147 |
export_btn.click(export_to_csv, inputs=[], outputs=csv_file_output)
|
148 |
-
sample_btn_te.click(lambda: generate_sample_audio(sample_texts["Telugu Sample"], 'te'), inputs=[], outputs=sample_audio)
|
149 |
-
sample_btn_hi.click(lambda: generate_sample_audio(sample_texts["Hindi Sample"], 'hi'), inputs=[], outputs=sample_audio)
|
150 |
-
sample_btn_ta.click(lambda: generate_sample_audio(sample_texts["Tamil Sample"], 'ta'), inputs=[], outputs=sample_audio)
|
151 |
-
sample_btn_en.click(lambda: generate_sample_audio(sample_texts["English Sample"], 'en'), inputs=[], outputs=sample_audio)
|
152 |
|
153 |
demo.launch()
|
|
|
|
|
1 |
|
2 |
import gradio as gr
|
|
|
3 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
4 |
from wordcloud import WordCloud
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import numpy as np
|
7 |
+
import soundfile as sf
|
8 |
import torch
|
9 |
+
import io
|
10 |
import csv
|
11 |
+
from datetime import datetime
|
12 |
+
import os
|
|
|
|
|
13 |
|
14 |
+
# Set up translation and summarization models
|
15 |
translation_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
16 |
translation_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
17 |
+
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
18 |
device = 0 if torch.cuda.is_available() else -1
|
19 |
|
20 |
+
# Persistent state
|
21 |
+
feedback_records = []
|
22 |
feedback_words = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
lang_map = {
|
25 |
+
"Hindi": ("theainerd/Wav2Vec2-large-xlsr-hindi", "hin_Deva"),
|
26 |
+
"Telugu": ("anuragshas/wav2vec2-large-xlsr-53-telugu", "tel_Telu"),
|
27 |
+
"Tamil": ("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250", "tam_Taml"),
|
28 |
+
"Kannada": ("vasista22/whisper-kannada-medium", "kan_Knda"),
|
29 |
+
"English": ("openai/whisper-base.en", None)
|
30 |
+
}
|
|
|
|
|
31 |
|
32 |
def translate(text, src_lang, tgt_lang="eng_Latn"):
|
33 |
translation_pipeline = pipeline(
|
|
|
39 |
max_length=400,
|
40 |
device=device
|
41 |
)
|
42 |
+
return translation_pipeline(text)[0]["translation_text"]
|
43 |
+
|
44 |
+
def process_feedback(audio_np, language):
|
45 |
+
if audio_np is None:
|
46 |
+
return "No audio provided", None
|
47 |
+
|
48 |
+
# Transcribe
|
49 |
+
model_name, src_lang = lang_map[language]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
transcriber = pipeline("automatic-speech-recognition", model=model_name)
|
51 |
+
transcription = transcriber(audio_np)["text"]
|
52 |
+
|
53 |
+
if language != "English":
|
54 |
+
transcription = translate(transcription, src_lang=src_lang)
|
55 |
|
56 |
+
# Summarize
|
57 |
+
summary = summarizer(transcription, max_length=60, min_length=10, do_sample=False)[0]["summary_text"]
|
58 |
|
59 |
+
# Save record
|
60 |
+
feedback_records.append({
|
61 |
+
"timestamp": datetime.utcnow().isoformat(),
|
62 |
+
"language": language,
|
63 |
+
"transcription": transcription,
|
64 |
+
"summary": summary
|
65 |
+
})
|
66 |
|
67 |
+
# Word cloud
|
68 |
+
feedback_words.extend(summary.lower().split())
|
69 |
+
wordcloud_img = generate_wordcloud(feedback_words)
|
70 |
|
71 |
+
return summary, wordcloud_img
|
|
|
|
|
72 |
|
73 |
+
def generate_wordcloud(words):
|
74 |
+
wordcloud = WordCloud(width=600, height=300, background_color="white").generate(" ".join(words))
|
75 |
+
fig, ax = plt.subplots(figsize=(6, 3))
|
76 |
+
ax.imshow(wordcloud, interpolation="bilinear")
|
77 |
+
ax.axis("off")
|
78 |
buf = io.BytesIO()
|
|
|
|
|
79 |
plt.savefig(buf, format="png")
|
80 |
+
plt.close(fig)
|
81 |
buf.seek(0)
|
82 |
+
return buf
|
|
|
|
|
83 |
|
84 |
def export_to_csv():
|
85 |
+
filename = "feedback_export.csv"
|
86 |
+
with open(filename, mode='w', newline='', encoding='utf-8') as file:
|
87 |
+
writer = csv.DictWriter(file, fieldnames=["timestamp", "language", "transcription", "summary"])
|
88 |
writer.writeheader()
|
89 |
+
for row in feedback_records:
|
90 |
writer.writerow(row)
|
91 |
+
return filename
|
|
|
92 |
|
93 |
demo = gr.Blocks()
|
94 |
|
95 |
with demo:
|
96 |
gr.Markdown("# 🎙️ VoicePulse Multilingual Feedback Collector")
|
97 |
+
gr.Markdown(
|
98 |
+
"🗣️ **VoicePulse** lets you upload or speak feedback in your language — Telugu, Hindi, Tamil, Kannada, or English.\n"
|
99 |
+
"It transcribes, translates, and summarizes the feedback, building a live word cloud to show what people care about."
|
100 |
+
)
|
|
|
101 |
|
102 |
with gr.Row():
|
103 |
+
gr.Markdown("🔴 **Select language of the uploaded file**")
|
|
|
104 |
|
105 |
with gr.Row():
|
106 |
+
audio_input = gr.Audio(type="numpy", label="📤 Upload your feedback audio")
|
107 |
+
lang_dropdown = gr.Dropdown(label="🌐 Language", choices=["English", "Hindi", "Telugu", "Tamil", "Kannada"], value="English")
|
108 |
+
|
109 |
with gr.Row():
|
110 |
+
submit_btn = gr.Button("✅ Process Feedback")
|
111 |
+
|
|
|
|
|
|
|
|
|
|
|
112 |
with gr.Row():
|
113 |
summary_out = gr.Textbox(label="📝 Summarized Feedback")
|
114 |
wordcloud_out = gr.Image(type="pil", label="☁️ Word Cloud of All Feedback")
|
|
|
119 |
|
120 |
submit_btn.click(process_feedback, inputs=[audio_input, lang_dropdown], outputs=[summary_out, wordcloud_out])
|
121 |
export_btn.click(export_to_csv, inputs=[], outputs=csv_file_output)
|
|
|
|
|
|
|
|
|
122 |
|
123 |
demo.launch()
|