amyakir commited on
Commit
ec2cf2f
Β·
verified Β·
1 Parent(s): 2eae3ad

Update app.py

Browse files

import gradio as gr
from transformers import pipeline
import torch
from TTS.api import TTS
import tempfile
import os
import speech_recognition as sr
from difflib import SequenceMatcher

# Load models
qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)

# Simulate QA by extracting key sentence from input text (placeholder)
def extract_answer(question, context):
for line in context.split("\n"):
if any(word.lower() in line.lower() for word in question.split()[:3]):
return line
return ""

def generate_questions(text):
output = qg_pipeline(f"generate questions: {text}", num_return_sequences=3)
questions = [q["generated_text"] for q in output]
return (questions, text, 0) # this tuple is stored in state

def ask_question(state):
questions, context, idx = state
if idx >= len(questions):
return "βœ… All questions asked.", None, state

question = questions[idx]
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
tts.tts_to_file(text=question, file_path=fp.name)
audio_path = fp.name

return question, audio_path, (questions, context, idx + 1)

def transcribe_and_feedback(audio_path, state):
questions, context, idx = state
if idx == 0 or idx > len(questions):
return "Please ask a question first.", state

recognizer = sr.Recognizer()
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
try:
user_answer = recognizer.recognize_google(audio_data)
except:
return "❌ Could not understand your answer.", state

# Compare with expected answer
question = questions[idx - 1] # subtract 1 because idx was already incremented
expected = extract_answer(question, context)
ratio = SequenceMatcher(None, user_answer.lower(), expected.lower()).ratio()

if ratio > 0.6:
feedback = f"βœ… Good answer: {user_answer}"
else:
feedback = f"❌ Try again. You said: {user_answer}"

return feedback, (questions, context, idx)

with gr.Blocks() as app:
gr.Markdown("### πŸŽ“ Interactive Speaking Practice with Coursebook Dialogues")

with gr.Row():
course_text = gr.Textbox(lines=8, label="πŸ“˜ Paste Coursebook Text")
gen_btn = gr.Button("πŸ”„ Generate Questions")

question_text = gr.Textbox(label="πŸŽ™οΈ Current Question")
question_audio = gr.Audio(label="πŸ”Š Listen to Question", type="filepath")
ask_btn = gr.Button("▢️ Ask Next Question")

user_audio = gr.Audio(label="🎧 Your Spoken Answer", sources="microphone", type="filepath")
transcribe_btn = gr.Button("πŸ“ Submit Answer")
feedback_output = gr.Textbox(label="πŸ—¨οΈ Feedback")

conversation_state = gr.State()

gen_btn.click(fn=generate_questions, inputs=course_text, outputs=conversation_state)
ask_btn.click(fn=ask_question, inputs=conversation_state, outputs=[question_text, question_audio, conversation_state])
transcribe_btn.click(fn=transcribe_and_feedback, inputs=[user_audio, conversation_state], outputs=[feedback_output, conversation_state])

app.launch()

Files changed (1) hide show
  1. app.py +0 -83
app.py CHANGED
@@ -1,83 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- import torch
4
- from TTS.api import TTS
5
- import tempfile
6
- import os
7
- import speech_recognition as sr
8
- from difflib import SequenceMatcher
9
-
10
- # Load models
11
- qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
12
- tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
13
-
14
- # Simulate QA by extracting key sentence from input text (placeholder for real QA)
15
- def extract_answer(question, context):
16
- for line in context.split("\n"):
17
- if any(word.lower() in line.lower() for word in question.split()[:3]):
18
- return line
19
- return ""
20
-
21
- def generate_questions(text):
22
- output = qg_pipeline(f"generate questions: {text}", num_return_sequences=3)
23
- questions = [q["generated_text"] for q in output]
24
- return questions, text, 0 # store context and index
25
-
26
- def ask_question(state):
27
- questions, context, idx = state
28
- if idx >= len(questions):
29
- return "All questions asked.", None, state
30
-
31
- question = questions[idx]
32
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
33
- tts.tts_to_file(text=question, file_path=fp.name)
34
- audio_path = fp.name
35
-
36
- return question, audio_path, (questions, context, idx)
37
-
38
- def transcribe_and_feedback(audio_path, state):
39
- questions, context, idx = state
40
- if idx == 0 or idx > len(questions):
41
- return "Please ask a question first.", state
42
-
43
- recognizer = sr.Recognizer()
44
- with sr.AudioFile(audio_path) as source:
45
- audio_data = recognizer.record(source)
46
- try:
47
- user_answer = recognizer.recognize_google(audio_data)
48
- except:
49
- return "Could not understand the answer.", state
50
-
51
- # Simulate expected answer
52
- question = questions[idx - 1]
53
- expected = extract_answer(question, context)
54
- ratio = SequenceMatcher(None, user_answer.lower(), expected.lower()).ratio()
55
- if ratio > 0.6:
56
- feedback = f"βœ… Good answer: {user_answer}"
57
- else:
58
- feedback = f"❌ Try again. You said: {user_answer}"
59
-
60
- return feedback, (questions, context, idx)
61
-
62
- with gr.Blocks() as app:
63
- gr.Markdown("### πŸŽ“ Interactive Q&A Lesson")
64
-
65
- with gr.Row():
66
- course_text = gr.Textbox(lines=8, label="Paste Coursebook Text")
67
- gen_btn = gr.Button("πŸ”„ Generate Questions")
68
-
69
- question_text = gr.Textbox(label="Current Question")
70
- question_audio = gr.Audio(label="Listen to Question", type="filepath")
71
- ask_btn = gr.Button("▢️ Ask Next Question")
72
-
73
- user_audio = gr.Audio(label="Your Spoken Answer", sources="microphone", type="filepath")
74
- transcribe_btn = gr.Button("πŸ“ Submit Answer")
75
- feedback_output = gr.Textbox(label="Feedback")
76
-
77
- conversation_state = gr.State()
78
-
79
- gen_btn.click(generate_questions, inputs=course_text, outputs=[conversation_state, course_text, gr.State(0)])
80
- ask_btn.click(ask_question, inputs=conversation_state, outputs=[question_text, question_audio, conversation_state])
81
- transcribe_btn.click(transcribe_and_feedback, inputs=[user_audio, conversation_state], outputs=[feedback_output, conversation_state])
82
-
83
- app.launch()