Spaces:
Sleeping
Sleeping
File size: 3,146 Bytes
19f74b2 548a255 4f8a704 548a255 4f8a704 a811d46 548a255 a811d46 548a255 4f8a704 548a255 a811d46 be65152 a811d46 4f8a704 a811d46 be65152 a811d46 548a255 4f8a704 548a255 a811d46 4f8a704 a811d46 65173cc a811d46 548a255 4f8a704 a811d46 548a255 a811d46 548a255 a811d46 e34be93 a811d46 e34be93 a811d46 e34be93 4f8a704 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
from transformers import pipeline
import torch
from TTS.api import TTS
import tempfile
import os
import speech_recognition as sr
from difflib import SequenceMatcher
# Load models
qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
# Simulate QA by extracting key sentence from input text (placeholder for real QA)
def extract_answer(question, context):
for line in context.split("\n"):
if any(word.lower() in line.lower() for word in question.split()[:3]):
return line
return ""
def generate_questions(text):
output = qg_pipeline(f"generate questions: {text}", num_return_sequences=3)
questions = [q["generated_text"] for q in output]
return questions, text, 0 # store context and index
def ask_question(state):
questions, context, idx = state
if idx >= len(questions):
return "All questions asked.", None, state
question = questions[idx]
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
tts.tts_to_file(text=question, file_path=fp.name)
audio_path = fp.name
return question, audio_path, (questions, context, idx)
def transcribe_and_feedback(audio_path, state):
questions, context, idx = state
if idx == 0 or idx > len(questions):
return "Please ask a question first.", state
recognizer = sr.Recognizer()
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
try:
user_answer = recognizer.recognize_google(audio_data)
except:
return "Could not understand the answer.", state
# Simulate expected answer
question = questions[idx - 1]
expected = extract_answer(question, context)
ratio = SequenceMatcher(None, user_answer.lower(), expected.lower()).ratio()
if ratio > 0.6:
feedback = f"β
Good answer: {user_answer}"
else:
feedback = f"β Try again. You said: {user_answer}"
return feedback, (questions, context, idx)
with gr.Blocks() as app:
gr.Markdown("### π Interactive Q&A Lesson")
with gr.Row():
course_text = gr.Textbox(lines=8, label="Paste Coursebook Text")
gen_btn = gr.Button("π Generate Questions")
question_text = gr.Textbox(label="Current Question")
question_audio = gr.Audio(label="Listen to Question", type="filepath")
ask_btn = gr.Button("βΆοΈ Ask Next Question")
user_audio = gr.Audio(label="Your Spoken Answer", sources="microphone", type="filepath")
transcribe_btn = gr.Button("π Submit Answer")
feedback_output = gr.Textbox(label="Feedback")
conversation_state = gr.State()
gen_btn.click(generate_questions, inputs=course_text, outputs=[conversation_state, course_text, gr.State(0)])
ask_btn.click(ask_question, inputs=conversation_state, outputs=[question_text, question_audio, conversation_state])
transcribe_btn.click(transcribe_and_feedback, inputs=[user_audio, conversation_state], outputs=[feedback_output, conversation_state])
app.launch() |