Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,75 +5,79 @@ from TTS.api import TTS
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
import speech_recognition as sr
|
|
|
8 |
|
9 |
-
# Load
|
10 |
qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
|
11 |
-
|
12 |
-
# Load TTS model
|
13 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
# Extract a possible answer from the text (you can improve this logic)
|
22 |
-
last_answer = "They are Aladdin lamps" # You can auto-extract later
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
27 |
|
28 |
-
|
29 |
-
|
|
|
|
|
30 |
|
31 |
-
|
32 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
|
33 |
tts.tts_to_file(text=question, file_path=fp.name)
|
34 |
audio_path = fp.name
|
35 |
|
36 |
-
return question, audio_path
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
def transcribe_and_check(audio_path):
|
39 |
-
global expected_answer
|
40 |
recognizer = sr.Recognizer()
|
41 |
with sr.AudioFile(audio_path) as source:
|
42 |
audio_data = recognizer.record(source)
|
43 |
try:
|
44 |
-
user_answer = recognizer.recognize_google(audio_data)
|
45 |
-
except
|
46 |
-
return "
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
common = user_words.intersection(expected_words)
|
55 |
-
score = len(common) / max(1, len(expected_words))
|
56 |
-
|
57 |
-
if score > 0.3 or any(word in user_answer for word in ["special", "lamp"]):
|
58 |
-
return f"β
Good answer: '{user_answer}'"
|
59 |
else:
|
60 |
-
|
|
|
|
|
61 |
|
62 |
with gr.Blocks() as app:
|
63 |
-
gr.Markdown("### π Interactive
|
|
|
|
|
|
|
|
|
64 |
|
65 |
-
|
|
|
|
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
|
71 |
-
|
72 |
-
transcribe_btn = gr.Button("π Transcribe Answer")
|
73 |
-
transcription_output = gr.Textbox(label="π£ Transcribed Answer")
|
74 |
-
feedback_output = gr.Textbox(label="π§ͺ Feedback")
|
75 |
|
76 |
-
|
77 |
-
|
|
|
78 |
|
79 |
app.launch()
|
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
import speech_recognition as sr
|
8 |
+
from difflib import SequenceMatcher
|
9 |
|
10 |
+
# Load models
|
11 |
qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
|
|
|
|
|
12 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
|
13 |
|
14 |
+
# Simulate QA by extracting key sentence from input text (placeholder for real QA)
|
15 |
+
def extract_answer(question, context):
|
16 |
+
for line in context.split("\n"):
|
17 |
+
if any(word.lower() in line.lower() for word in question.split()[:3]):
|
18 |
+
return line
|
19 |
+
return ""
|
|
|
|
|
20 |
|
21 |
+
def generate_questions(text):
|
22 |
+
output = qg_pipeline(f"generate questions: {text}", num_return_sequences=3)
|
23 |
+
questions = [q["generated_text"] for q in output]
|
24 |
+
return questions, text, 0 # store context and index
|
25 |
|
26 |
+
def ask_question(state):
|
27 |
+
questions, context, idx = state
|
28 |
+
if idx >= len(questions):
|
29 |
+
return "All questions asked.", None, state
|
30 |
|
31 |
+
question = questions[idx]
|
32 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
|
33 |
tts.tts_to_file(text=question, file_path=fp.name)
|
34 |
audio_path = fp.name
|
35 |
|
36 |
+
return question, audio_path, (questions, context, idx)
|
37 |
+
|
38 |
+
def transcribe_and_feedback(audio_path, state):
|
39 |
+
questions, context, idx = state
|
40 |
+
if idx == 0 or idx > len(questions):
|
41 |
+
return "Please ask a question first.", state
|
42 |
|
|
|
|
|
43 |
recognizer = sr.Recognizer()
|
44 |
with sr.AudioFile(audio_path) as source:
|
45 |
audio_data = recognizer.record(source)
|
46 |
try:
|
47 |
+
user_answer = recognizer.recognize_google(audio_data)
|
48 |
+
except:
|
49 |
+
return "Could not understand the answer.", state
|
50 |
+
|
51 |
+
# Simulate expected answer
|
52 |
+
question = questions[idx - 1]
|
53 |
+
expected = extract_answer(question, context)
|
54 |
+
ratio = SequenceMatcher(None, user_answer.lower(), expected.lower()).ratio()
|
55 |
+
if ratio > 0.6:
|
56 |
+
feedback = f"β
Good answer: {user_answer}"
|
|
|
|
|
|
|
|
|
|
|
57 |
else:
|
58 |
+
feedback = f"β Try again. You said: {user_answer}"
|
59 |
+
|
60 |
+
return feedback, (questions, context, idx)
|
61 |
|
62 |
with gr.Blocks() as app:
|
63 |
+
gr.Markdown("### π Interactive Q&A Lesson")
|
64 |
+
|
65 |
+
with gr.Row():
|
66 |
+
course_text = gr.Textbox(lines=8, label="Paste Coursebook Text")
|
67 |
+
gen_btn = gr.Button("π Generate Questions")
|
68 |
|
69 |
+
question_text = gr.Textbox(label="Current Question")
|
70 |
+
question_audio = gr.Audio(label="Listen to Question", type="filepath")
|
71 |
+
ask_btn = gr.Button("βΆοΈ Ask Next Question")
|
72 |
|
73 |
+
user_audio = gr.Audio(label="Your Spoken Answer", sources="microphone", type="filepath")
|
74 |
+
transcribe_btn = gr.Button("π Submit Answer")
|
75 |
+
feedback_output = gr.Textbox(label="Feedback")
|
76 |
|
77 |
+
conversation_state = gr.State()
|
|
|
|
|
|
|
78 |
|
79 |
+
gen_btn.click(generate_questions, inputs=course_text, outputs=[conversation_state, course_text, gr.State(0)])
|
80 |
+
ask_btn.click(ask_question, inputs=conversation_state, outputs=[question_text, question_audio, conversation_state])
|
81 |
+
transcribe_btn.click(transcribe_and_feedback, inputs=[user_audio, conversation_state], outputs=[feedback_output, conversation_state])
|
82 |
|
83 |
app.launch()
|