Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,56 +1,54 @@
|
|
1 |
-
# app.py
|
2 |
import gradio as gr
|
3 |
from transformers import pipeline
|
4 |
import torch
|
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
-
|
8 |
-
import whisper
|
9 |
|
10 |
-
# Load question
|
11 |
qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
|
12 |
|
13 |
# Load TTS model
|
14 |
-
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
|
15 |
-
|
16 |
-
# Load Whisper STT model
|
17 |
-
whisper_model = whisper.load_model("base")
|
18 |
|
19 |
-
# Generate question and audio from input text
|
20 |
def generate_question(text):
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
24 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
|
25 |
-
tts.tts_to_file(text=
|
26 |
audio_path = fp.name
|
27 |
|
28 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
audio = whisper.load_audio(audio)
|
33 |
-
audio = whisper.pad_or_trim(audio)
|
34 |
-
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
35 |
-
options = whisper.DecodingOptions()
|
36 |
-
result = whisper.decode(whisper_model, mel, options)
|
37 |
-
return result.text
|
38 |
|
39 |
-
|
40 |
-
with gr.Blocks() as demo:
|
41 |
-
gr.Markdown("### Voice Q&A Generator")
|
42 |
-
with gr.Row():
|
43 |
-
input_text = gr.Textbox(label="Coursebook Text")
|
44 |
-
generate_btn = gr.Button("Generate Question")
|
45 |
|
46 |
-
|
47 |
-
|
|
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
|
53 |
-
generate_btn.click(fn=generate_question, inputs=
|
54 |
-
|
55 |
|
56 |
-
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
import torch
|
4 |
+
from TTS.api import TTS
|
5 |
import tempfile
|
6 |
import os
|
7 |
+
import speech_recognition as sr
|
|
|
8 |
|
9 |
+
# Load question generation model
|
10 |
qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
|
11 |
|
12 |
# Load TTS model
|
13 |
+
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
|
|
|
|
|
|
|
14 |
|
|
|
15 |
def generate_question(text):
|
16 |
+
# Prompt for question generation
|
17 |
+
input_text = f"generate question: {text.strip()}"
|
18 |
+
question = qg_pipeline(input_text)[0]["generated_text"]
|
19 |
+
|
20 |
+
# Save spoken question as audio
|
21 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
|
22 |
+
tts.tts_to_file(text=question, file_path=fp.name)
|
23 |
audio_path = fp.name
|
24 |
|
25 |
+
return question, audio_path
|
26 |
+
|
27 |
+
def transcribe_audio(audio_path):
|
28 |
+
recognizer = sr.Recognizer()
|
29 |
+
with sr.AudioFile(audio_path) as source:
|
30 |
+
audio_data = recognizer.record(source)
|
31 |
+
try:
|
32 |
+
return recognizer.recognize_google(audio_data)
|
33 |
+
except sr.UnknownValueError:
|
34 |
+
return "Sorry, I could not understand your answer."
|
35 |
+
except sr.RequestError:
|
36 |
+
return "Sorry, there was an error with the speech recognition service."
|
37 |
|
38 |
+
with gr.Blocks() as app:
|
39 |
+
gr.Markdown("### π Enter your coursebook text below:")
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
course_text = gr.Textbox(lines=6, label="Coursebook Text")
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
generate_btn = gr.Button("π€ Generate Question and Speak")
|
44 |
+
question_output = gr.Textbox(label="Generated Question")
|
45 |
+
audio_output = gr.Audio(label="Question Audio", type="filepath")
|
46 |
|
47 |
+
user_audio = gr.Audio(label="Your Answer", type="filepath", sources=["microphone"])
|
48 |
+
transcribe_btn = gr.Button("π Transcribe Answer")
|
49 |
+
transcription_output = gr.Textbox(label="Transcribed Answer")
|
50 |
|
51 |
+
generate_btn.click(fn=generate_question, inputs=course_text, outputs=[question_output, audio_output])
|
52 |
+
transcribe_btn.click(fn=transcribe_audio, inputs=user_audio, outputs=transcription_output)
|
53 |
|
54 |
+
app.launch()
|