amyakir commited on
Commit
a811d46
Β·
verified Β·
1 Parent(s): 65173cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -47
app.py CHANGED
@@ -5,75 +5,79 @@ from TTS.api import TTS
5
  import tempfile
6
  import os
7
  import speech_recognition as sr
 
8
 
9
- # Load question generation model
10
  qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
11
-
12
- # Load TTS model
13
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
14
 
15
- # Global storage
16
- last_answer = ""
17
-
18
- def generate_question(text):
19
- global last_answer
20
-
21
- # Extract a possible answer from the text (you can improve this logic)
22
- last_answer = "They are Aladdin lamps" # You can auto-extract later
23
 
24
- # Prompt for question generation
25
- input_text = f"generate question: {text.strip()}"
26
- generated = qg_pipeline(input_text, max_length=64)[0]["generated_text"]
 
27
 
28
- # Keep only the first question
29
- question = generated.split("<sep>")[0].strip()
 
 
30
 
31
- # Generate audio
32
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
33
  tts.tts_to_file(text=question, file_path=fp.name)
34
  audio_path = fp.name
35
 
36
- return question, audio_path
 
 
 
 
 
37
 
38
- def transcribe_and_check(audio_path):
39
- global expected_answer
40
  recognizer = sr.Recognizer()
41
  with sr.AudioFile(audio_path) as source:
42
  audio_data = recognizer.record(source)
43
  try:
44
- user_answer = recognizer.recognize_google(audio_data).lower()
45
- except sr.UnknownValueError:
46
- return "Sorry, I couldn't understand your answer."
47
- except sr.RequestError:
48
- return "Speech recognition error."
49
-
50
- # Simple keyword-based matching
51
- user_words = set(user_answer.split())
52
- expected_words = set(expected_answer.lower().split())
53
-
54
- common = user_words.intersection(expected_words)
55
- score = len(common) / max(1, len(expected_words))
56
-
57
- if score > 0.3 or any(word in user_answer for word in ["special", "lamp"]):
58
- return f"βœ… Good answer: '{user_answer}'"
59
  else:
60
- return f"❌ Try again. You said: '{user_answer}'"
 
 
61
 
62
  with gr.Blocks() as app:
63
- gr.Markdown("### πŸŽ“ Interactive Coursebook Q&A")
 
 
 
 
64
 
65
- course_text = gr.Textbox(lines=6, label="πŸ“˜ Coursebook Text")
 
 
66
 
67
- generate_btn = gr.Button("πŸ”Š Generate Question and Speak")
68
- question_output = gr.Textbox(label="🧠 Generated Question")
69
- audio_output = gr.Audio(label="πŸ”ˆ Question Audio", type="filepath")
70
 
71
- user_audio = gr.Audio(label="🎀 Your Answer", type="filepath", sources=["microphone"])
72
- transcribe_btn = gr.Button("πŸ“ Transcribe Answer")
73
- transcription_output = gr.Textbox(label="πŸ—£ Transcribed Answer")
74
- feedback_output = gr.Textbox(label="πŸ§ͺ Feedback")
75
 
76
- generate_btn.click(fn=generate_question, inputs=course_text, outputs=[question_output, audio_output])
77
- transcribe_btn.click(fn=transcribe_and_check, inputs=user_audio, outputs=[transcription_output, feedback_output])
 
78
 
79
  app.launch()
 
5
  import tempfile
6
  import os
7
  import speech_recognition as sr
8
+ from difflib import SequenceMatcher
9
 
10
+ # Load models
11
  qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
 
 
12
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
13
 
14
+ # Simulate QA by extracting key sentence from input text (placeholder for real QA)
15
+ def extract_answer(question, context):
16
+ for line in context.split("\n"):
17
+ if any(word.lower() in line.lower() for word in question.split()[:3]):
18
+ return line
19
+ return ""
 
 
20
 
21
+ def generate_questions(text):
22
+ output = qg_pipeline(f"generate questions: {text}", num_return_sequences=3)
23
+ questions = [q["generated_text"] for q in output]
24
+ return questions, text, 0 # store context and index
25
 
26
+ def ask_question(state):
27
+ questions, context, idx = state
28
+ if idx >= len(questions):
29
+ return "All questions asked.", None, state
30
 
31
+ question = questions[idx]
32
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
33
  tts.tts_to_file(text=question, file_path=fp.name)
34
  audio_path = fp.name
35
 
36
+ return question, audio_path, (questions, context, idx)
37
+
38
+ def transcribe_and_feedback(audio_path, state):
39
+ questions, context, idx = state
40
+ if idx == 0 or idx > len(questions):
41
+ return "Please ask a question first.", state
42
 
 
 
43
  recognizer = sr.Recognizer()
44
  with sr.AudioFile(audio_path) as source:
45
  audio_data = recognizer.record(source)
46
  try:
47
+ user_answer = recognizer.recognize_google(audio_data)
48
+ except:
49
+ return "Could not understand the answer.", state
50
+
51
+ # Simulate expected answer
52
+ question = questions[idx - 1]
53
+ expected = extract_answer(question, context)
54
+ ratio = SequenceMatcher(None, user_answer.lower(), expected.lower()).ratio()
55
+ if ratio > 0.6:
56
+ feedback = f"βœ… Good answer: {user_answer}"
 
 
 
 
 
57
  else:
58
+ feedback = f"❌ Try again. You said: {user_answer}"
59
+
60
+ return feedback, (questions, context, idx)
61
 
62
  with gr.Blocks() as app:
63
+ gr.Markdown("### πŸŽ“ Interactive Q&A Lesson")
64
+
65
+ with gr.Row():
66
+ course_text = gr.Textbox(lines=8, label="Paste Coursebook Text")
67
+ gen_btn = gr.Button("πŸ”„ Generate Questions")
68
 
69
+ question_text = gr.Textbox(label="Current Question")
70
+ question_audio = gr.Audio(label="Listen to Question", type="filepath")
71
+ ask_btn = gr.Button("▢️ Ask Next Question")
72
 
73
+ user_audio = gr.Audio(label="Your Spoken Answer", sources="microphone", type="filepath")
74
+ transcribe_btn = gr.Button("πŸ“ Submit Answer")
75
+ feedback_output = gr.Textbox(label="Feedback")
76
 
77
+ conversation_state = gr.State()
 
 
 
78
 
79
+ gen_btn.click(generate_questions, inputs=course_text, outputs=[conversation_state, course_text, gr.State(0)])
80
+ ask_btn.click(ask_question, inputs=conversation_state, outputs=[question_text, question_audio, conversation_state])
81
+ transcribe_btn.click(transcribe_and_feedback, inputs=[user_audio, conversation_state], outputs=[feedback_output, conversation_state])
82
 
83
  app.launch()