File size: 4,707 Bytes
060e0ca
 
 
 
4b90f98
 
 
060e0ca
4b90f98
060e0ca
4b90f98
 
9dd2418
060e0ca
4b90f98
060e0ca
 
4b90f98
060e0ca
 
4b90f98
 
060e0ca
 
4b90f98
 
 
 
 
 
 
060e0ca
 
 
 
 
 
 
 
 
 
 
4b90f98
 
959435e
 
 
 
4b90f98
 
 
 
060e0ca
4b90f98
 
 
 
060e0ca
4b90f98
060e0ca
4b90f98
 
 
 
 
 
060e0ca
4b90f98
 
060e0ca
4b90f98
060e0ca
4b90f98
 
 
 
 
060e0ca
 
4b90f98
060e0ca
9dd2418
060e0ca
 
4b90f98
 
 
060e0ca
4b90f98
060e0ca
4b90f98
060e0ca
 
 
 
 
4b90f98
 
 
 
060e0ca
 
4b90f98
060e0ca
 
4b90f98
 
 
959435e
 
 
 
 
 
 
 
 
 
 
 
060e0ca
4b90f98
 
060e0ca
 
 
 
 
 
 
 
959435e
060e0ca
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
import torch
import io
import csv
from datetime import datetime
import os
from PIL import Image

# Set up translation and summarization models
translation_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
translation_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
device = 0 if torch.cuda.is_available() else -1

# Persistent state
feedback_records = []
feedback_words = []

lang_map = {
    "Hindi": ("theainerd/Wav2Vec2-large-xlsr-hindi", "hin_Deva"),
    "Telugu": ("anuragshas/wav2vec2-large-xlsr-53-telugu", "tel_Telu"),
    "Tamil": ("Harveenchadha/vakyansh-wav2vec2-tamil-tam-250", "tam_Taml"),
    "Kannada": ("vasista22/whisper-kannada-medium", "kan_Knda"),
    "English": ("openai/whisper-base.en", None)
}

def translate(text, src_lang, tgt_lang="eng_Latn"):
    translation_pipeline = pipeline(
        "translation",
        model=translation_model,
        tokenizer=translation_tokenizer,
        src_lang=src_lang,
        tgt_lang=tgt_lang,
        max_length=400,
        device=device
    )
    return translation_pipeline(text)[0]["translation_text"]

def process_feedback(audio_np, language, sample_file):
    if audio_np is None and sample_file:
        audio_np, _ = sf.read(sample_file)

    if audio_np is None:
        return "No audio provided", None

    model_name, src_lang = lang_map[language]
    transcriber = pipeline("automatic-speech-recognition", model=model_name)
    transcription = transcriber(audio_np)["text"]

    if language != "English":
        transcription = translate(transcription, src_lang=src_lang)

    summary = summarizer(transcription, max_length=60, min_length=10, do_sample=False)[0]["summary_text"]

    feedback_records.append({
        "timestamp": datetime.utcnow().isoformat(),
        "language": language,
        "transcription": transcription,
        "summary": summary
    })

    feedback_words.extend(summary.lower().split())
    wordcloud_img = generate_wordcloud(feedback_words)

    return summary, wordcloud_img

def generate_wordcloud(words):
    wordcloud = WordCloud(width=600, height=300, background_color="white").generate(" ".join(words))
    fig, ax = plt.subplots(figsize=(6, 3))
    ax.imshow(wordcloud, interpolation="bilinear")
    ax.axis("off")
    buf = io.BytesIO()
    plt.savefig(buf, format="png")
    plt.close(fig)
    buf.seek(0)
    return Image.open(buf)

def export_to_csv():
    filename = "feedback_export.csv"
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=["timestamp", "language", "transcription", "summary"])
        writer.writeheader()
        for row in feedback_records:
            writer.writerow(row)
    return filename

demo = gr.Blocks()

with demo:
    gr.Markdown("# πŸŽ™οΈ VoicePulse Multilingual Feedback Collector")
    gr.Markdown(
        "πŸ—£οΈ **VoicePulse** lets you upload or speak feedback in your language β€” Telugu, Hindi, Tamil, Kannada, or English.\n"
        "It transcribes, translates, and summarizes the feedback, building a live word cloud to show what people care about."
    )

    with gr.Row():
        gr.Markdown("πŸ”΄ **Select language of the uploaded file**")

    with gr.Row():
        audio_input = gr.Audio(type="numpy", label="πŸ“€ Upload your feedback audio")
        lang_dropdown = gr.Dropdown(label="🌐 Language", choices=["English", "Hindi", "Telugu", "Tamil", "Kannada"], value="English")

    with gr.Row():
        sample_selector = gr.Dropdown(
            label="🎧 Try with sample audio (optional)",
            choices=[
                "files/telugu.mp3",
                "files/hindi.mp3",
                "files/tamil.mp3",
                "files/english.mp3"
            ],
            value=None
        )

    with gr.Row():
        submit_btn = gr.Button("βœ… Process Feedback")

    with gr.Row():
        summary_out = gr.Textbox(label="πŸ“ Summarized Feedback")
        wordcloud_out = gr.Image(type="pil", label="☁️ Word Cloud of All Feedback")

    with gr.Row():
        export_btn = gr.Button("πŸ“ Export Feedback to CSV")
        csv_file_output = gr.File(label="πŸ“„ Download CSV")

    submit_btn.click(process_feedback, inputs=[audio_input, lang_dropdown, sample_selector], outputs=[summary_out, wordcloud_out])
    export_btn.click(export_to_csv, inputs=[], outputs=csv_file_output)

demo.launch()