File size: 3,146 Bytes
19f74b2
548a255
 
4f8a704
548a255
 
4f8a704
a811d46
548a255
a811d46
548a255
4f8a704
548a255
a811d46
 
 
 
 
 
be65152
a811d46
 
 
 
4f8a704
a811d46
 
 
 
be65152
a811d46
548a255
4f8a704
548a255
 
a811d46
 
 
 
 
 
4f8a704
 
 
 
 
a811d46
 
 
 
 
 
 
 
 
 
65173cc
a811d46
 
 
548a255
4f8a704
a811d46
 
 
 
 
548a255
a811d46
 
 
548a255
a811d46
 
 
e34be93
a811d46
e34be93
a811d46
 
 
e34be93
4f8a704
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
from transformers import pipeline
import torch
from TTS.api import TTS
import tempfile
import os
import speech_recognition as sr
from difflib import SequenceMatcher

# Load models
qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)

# Simulate QA by extracting key sentence from input text (placeholder for real QA)
def extract_answer(question, context):
    for line in context.split("\n"):
        if any(word.lower() in line.lower() for word in question.split()[:3]):
            return line
    return ""

def generate_questions(text):
    output = qg_pipeline(f"generate questions: {text}", num_return_sequences=3)
    questions = [q["generated_text"] for q in output]
    return questions, text, 0  # store context and index

def ask_question(state):
    questions, context, idx = state
    if idx >= len(questions):
        return "All questions asked.", None, state

    question = questions[idx]
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
        tts.tts_to_file(text=question, file_path=fp.name)
        audio_path = fp.name

    return question, audio_path, (questions, context, idx)

def transcribe_and_feedback(audio_path, state):
    questions, context, idx = state
    if idx == 0 or idx > len(questions):
        return "Please ask a question first.", state

    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
        try:
            user_answer = recognizer.recognize_google(audio_data)
        except:
            return "Could not understand the answer.", state

    # Simulate expected answer
    question = questions[idx - 1]
    expected = extract_answer(question, context)
    ratio = SequenceMatcher(None, user_answer.lower(), expected.lower()).ratio()
    if ratio > 0.6:
        feedback = f"βœ… Good answer: {user_answer}"
    else:
        feedback = f"❌ Try again. You said: {user_answer}"

    return feedback, (questions, context, idx)

with gr.Blocks() as app:
    gr.Markdown("### πŸŽ“ Interactive Q&A Lesson")

    with gr.Row():
        course_text = gr.Textbox(lines=8, label="Paste Coursebook Text")
        gen_btn = gr.Button("πŸ”„ Generate Questions")

    question_text = gr.Textbox(label="Current Question")
    question_audio = gr.Audio(label="Listen to Question", type="filepath")
    ask_btn = gr.Button("▢️ Ask Next Question")

    user_audio = gr.Audio(label="Your Spoken Answer", sources="microphone", type="filepath")
    transcribe_btn = gr.Button("πŸ“ Submit Answer")
    feedback_output = gr.Textbox(label="Feedback")

    conversation_state = gr.State()

    gen_btn.click(generate_questions, inputs=course_text, outputs=[conversation_state, course_text, gr.State(0)])
    ask_btn.click(ask_question, inputs=conversation_state, outputs=[question_text, question_audio, conversation_state])
    transcribe_btn.click(transcribe_and_feedback, inputs=[user_audio, conversation_state], outputs=[feedback_output, conversation_state])

app.launch()