vasuans commited on
Commit
4b90f98
·
verified ·
1 Parent(s): 51fee1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -94
app.py CHANGED
@@ -1,45 +1,33 @@
1
- # voicepulse.py (multilingual feedback transcriber and word cloud generator with export)
2
 
3
  import gradio as gr
4
- import numpy as np
5
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
6
- import matplotlib.pyplot as plt
7
  from wordcloud import WordCloud
8
- import nltk
9
- import io
 
10
  import torch
 
11
  import csv
12
- from gtts import gTTS
13
-
14
- nltk.download("stopwords")
15
- stop_words = set(nltk.corpus.stopwords.words("english"))
16
 
17
- # Translation model for multilingual -> English
18
  translation_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
19
  translation_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
 
20
  device = 0 if torch.cuda.is_available() else -1
21
 
22
- # In-memory feedback word list and archive
 
23
  feedback_words = []
24
- all_feedback = []
25
-
26
- # Sample audios
27
- sample_texts = {
28
- "Telugu Sample": "మీ సేవలు చాలా బాగున్నాయి. మేము చాలా సంతృప్తిగా ఉన్నాము.",
29
- "Hindi Sample": "आपकी सेवा बहुत अच्छी थी और हम संतुष्ट हैं।",
30
- "Tamil Sample": "உங்கள் சேவை மிகவும் சிறந்ததாக இருந்தது. நாங்கள் திருப்தி அடைந்தோம்.",
31
- "English Sample": "Your support team was helpful and responsive."
32
- }
33
 
34
- def generate_sample_audio(text, lang_code):
35
- tts = gTTS(text, lang=lang_code)
36
- tts.save("sample_full.mp3")
37
-
38
- from pydub import AudioSegment
39
- full_audio = AudioSegment.from_mp3("sample_full.mp3")
40
- short_audio = full_audio[:3000] # first 3 seconds
41
- short_audio.export("sample.mp3", format="mp3")
42
- return "sample.mp3"
43
 
44
  def translate(text, src_lang, tgt_lang="eng_Latn"):
45
  translation_pipeline = pipeline(
@@ -51,90 +39,76 @@ def translate(text, src_lang, tgt_lang="eng_Latn"):
51
  max_length=400,
52
  device=device
53
  )
54
- result = translation_pipeline(text)
55
- return result[0]['translation_text']
56
-
57
- def get_transcription(audio, language):
58
- sr, y = audio
59
- y = y.astype(np.float32)
60
- y /= np.max(np.abs(y))
61
-
62
- if language == "English":
63
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
64
- return transcriber({"sampling_rate": sr, "raw": y})["text"]
65
-
66
- model_map = {
67
- "Hindi": ("theainerd/Wav2Vec2-large-xlsr-hindi", "hin_Deva"),
68
- "Telugu": ("anuragshas/wav2vec2-large-xlsr-53-telugu", "tel_Telu"),
69
- "Tamil": ("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250", "tam_Taml"),
70
- "Kannada": ("vasista22/whisper-kannada-medium", "kan_Knda")
71
- }
72
-
73
- model_name, src_lang = model_map[language]
74
  transcriber = pipeline("automatic-speech-recognition", model=model_name)
75
- text = transcriber({"sampling_rate": sr, "raw": y})["text"]
76
- return translate(text, src_lang)
 
 
77
 
78
- def process_feedback(audio, language):
79
- transcription = get_transcription(audio, language)
80
 
81
- # Use summarization to extract core feedback idea
82
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
83
- summary = summarizer(transcription, max_length=60, min_length=10, do_sample=False)[0]['summary_text']
 
 
 
 
84
 
85
- # Save for download
86
- all_feedback.append({"Language": language, "Transcription": transcription, "Summary": summary})
 
87
 
88
- # Extract meaningful words from summary
89
- words = [w for w in summary.lower().split() if w.isalpha() and w not in stop_words]
90
- feedback_words.extend(words)
91
 
92
- freq = {w: feedback_words.count(w) for w in set(feedback_words)}
93
- wc = WordCloud(width=800, height=400, background_color="white").generate_from_frequencies(freq)
 
 
 
94
  buf = io.BytesIO()
95
- plt.imshow(wc, interpolation="bilinear")
96
- plt.axis("off")
97
  plt.savefig(buf, format="png")
 
98
  buf.seek(0)
99
- image = plt.imread(buf, format="png")
100
-
101
- return summary, image
102
 
103
  def export_to_csv():
104
- with open("feedback_export.csv", "w", newline="") as csvfile:
105
- fieldnames = ["Language", "Transcription", "Summary"]
106
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
107
  writer.writeheader()
108
- for row in all_feedback:
109
  writer.writerow(row)
110
- return "feedback_export.csv"
111
-
112
 
113
  demo = gr.Blocks()
114
 
115
  with demo:
116
  gr.Markdown("# 🎙️ VoicePulse Multilingual Feedback Collector")
117
- gr.Markdown("""
118
- 🗣️ **VoicePulse** lets you speak feedback in your language — Telugu, Hindi, Tamil, Kannada, or English.
119
- It transcribes, translates, and summarizes the feedback, building a live word cloud to show what people care about.
120
- Try speaking or use sample audio buttons below!
121
- """)
122
 
123
  with gr.Row():
124
- audio_input = gr.Audio(type="numpy", label="🎤 Speak your feedback")
125
- lang_dropdown = gr.Dropdown(label="🌐 Language", choices=["English", "Hindi", "Telugu", "Tamil", "Kannada"], value="English")
126
 
127
  with gr.Row():
128
- submit_btn = gr.Button("Process Feedback")
129
-
 
130
  with gr.Row():
131
- gr.Markdown("### 🎧 Sample Feedback (Telugu, Hindi, Tamil, English)")
132
- sample_btn_te = gr.Button("🔉 Telugu Sample")
133
- sample_btn_hi = gr.Button("🔉 Hindi Sample")
134
- sample_btn_ta = gr.Button("🔉 Tamil Sample")
135
- sample_btn_en = gr.Button("🔉 English Sample")
136
- sample_audio = gr.Audio(label="🔊 Sample Audio Output (Preview)")
137
-
138
  with gr.Row():
139
  summary_out = gr.Textbox(label="📝 Summarized Feedback")
140
  wordcloud_out = gr.Image(type="pil", label="☁️ Word Cloud of All Feedback")
@@ -145,9 +119,5 @@ with demo:
145
 
146
  submit_btn.click(process_feedback, inputs=[audio_input, lang_dropdown], outputs=[summary_out, wordcloud_out])
147
  export_btn.click(export_to_csv, inputs=[], outputs=csv_file_output)
148
- sample_btn_te.click(lambda: generate_sample_audio(sample_texts["Telugu Sample"], 'te'), inputs=[], outputs=sample_audio)
149
- sample_btn_hi.click(lambda: generate_sample_audio(sample_texts["Hindi Sample"], 'hi'), inputs=[], outputs=sample_audio)
150
- sample_btn_ta.click(lambda: generate_sample_audio(sample_texts["Tamil Sample"], 'ta'), inputs=[], outputs=sample_audio)
151
- sample_btn_en.click(lambda: generate_sample_audio(sample_texts["English Sample"], 'en'), inputs=[], outputs=sample_audio)
152
 
153
  demo.launch()
 
 
1
 
2
  import gradio as gr
 
3
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 
4
  from wordcloud import WordCloud
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import soundfile as sf
8
  import torch
9
+ import io
10
  import csv
11
+ from datetime import datetime
12
+ import os
 
 
13
 
14
+ # Set up translation and summarization models
15
  translation_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
16
  translation_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
17
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
18
  device = 0 if torch.cuda.is_available() else -1
19
 
20
+ # Persistent state
21
+ feedback_records = []
22
  feedback_words = []
 
 
 
 
 
 
 
 
 
23
 
24
+ lang_map = {
25
+ "Hindi": ("theainerd/Wav2Vec2-large-xlsr-hindi", "hin_Deva"),
26
+ "Telugu": ("anuragshas/wav2vec2-large-xlsr-53-telugu", "tel_Telu"),
27
+ "Tamil": ("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250", "tam_Taml"),
28
+ "Kannada": ("vasista22/whisper-kannada-medium", "kan_Knda"),
29
+ "English": ("openai/whisper-base.en", None)
30
+ }
 
 
31
 
32
  def translate(text, src_lang, tgt_lang="eng_Latn"):
33
  translation_pipeline = pipeline(
 
39
  max_length=400,
40
  device=device
41
  )
42
+ return translation_pipeline(text)[0]["translation_text"]
43
+
44
+ def process_feedback(audio_np, language):
45
+ if audio_np is None:
46
+ return "No audio provided", None
47
+
48
+ # Transcribe
49
+ model_name, src_lang = lang_map[language]
 
 
 
 
 
 
 
 
 
 
 
 
50
  transcriber = pipeline("automatic-speech-recognition", model=model_name)
51
+ transcription = transcriber(audio_np)["text"]
52
+
53
+ if language != "English":
54
+ transcription = translate(transcription, src_lang=src_lang)
55
 
56
+ # Summarize
57
+ summary = summarizer(transcription, max_length=60, min_length=10, do_sample=False)[0]["summary_text"]
58
 
59
+ # Save record
60
+ feedback_records.append({
61
+ "timestamp": datetime.utcnow().isoformat(),
62
+ "language": language,
63
+ "transcription": transcription,
64
+ "summary": summary
65
+ })
66
 
67
+ # Word cloud
68
+ feedback_words.extend(summary.lower().split())
69
+ wordcloud_img = generate_wordcloud(feedback_words)
70
 
71
+ return summary, wordcloud_img
 
 
72
 
73
+ def generate_wordcloud(words):
74
+ wordcloud = WordCloud(width=600, height=300, background_color="white").generate(" ".join(words))
75
+ fig, ax = plt.subplots(figsize=(6, 3))
76
+ ax.imshow(wordcloud, interpolation="bilinear")
77
+ ax.axis("off")
78
  buf = io.BytesIO()
 
 
79
  plt.savefig(buf, format="png")
80
+ plt.close(fig)
81
  buf.seek(0)
82
+ return buf
 
 
83
 
84
  def export_to_csv():
85
+ filename = "feedback_export.csv"
86
+ with open(filename, mode='w', newline='', encoding='utf-8') as file:
87
+ writer = csv.DictWriter(file, fieldnames=["timestamp", "language", "transcription", "summary"])
88
  writer.writeheader()
89
+ for row in feedback_records:
90
  writer.writerow(row)
91
+ return filename
 
92
 
93
  demo = gr.Blocks()
94
 
95
  with demo:
96
  gr.Markdown("# 🎙️ VoicePulse Multilingual Feedback Collector")
97
+ gr.Markdown(
98
+ "🗣️ **VoicePulse** lets you upload or speak feedback in your language — Telugu, Hindi, Tamil, Kannada, or English.\n"
99
+ "It transcribes, translates, and summarizes the feedback, building a live word cloud to show what people care about."
100
+ )
 
101
 
102
  with gr.Row():
103
+ gr.Markdown("🔴 **Select language of the uploaded file**")
 
104
 
105
  with gr.Row():
106
+ audio_input = gr.Audio(type="numpy", label="📤 Upload your feedback audio")
107
+ lang_dropdown = gr.Dropdown(label="🌐 Language", choices=["English", "Hindi", "Telugu", "Tamil", "Kannada"], value="English")
108
+
109
  with gr.Row():
110
+ submit_btn = gr.Button(" Process Feedback")
111
+
 
 
 
 
 
112
  with gr.Row():
113
  summary_out = gr.Textbox(label="📝 Summarized Feedback")
114
  wordcloud_out = gr.Image(type="pil", label="☁️ Word Cloud of All Feedback")
 
119
 
120
  submit_btn.click(process_feedback, inputs=[audio_input, lang_dropdown], outputs=[summary_out, wordcloud_out])
121
  export_btn.click(export_to_csv, inputs=[], outputs=csv_file_output)
 
 
 
 
122
 
123
  demo.launch()