Spaces:

amyakir
/

voice-question-generator

Sleeping

App Files Files Community

voice-question-generator / app.py

amyakir

Update app.py

3beb92d verified 27 days ago

raw

history blame contribute delete

3.25 kB

	import gradio as gr
	from transformers import pipeline
	import torch
	from TTS.api import TTS
	import tempfile
	import os
	import speech_recognition as sr
	from difflib import SequenceMatcher

	# Load models
	qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-e2e-qg")
	tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)

	# Extract answer for comparison
	def extract_answer(question, context):
	for line in context.split("\n"):
	if any(word.lower() in line.lower() for word in question.split()[:3]):
	return line
	return ""

	# Generate questions from text
	def generate_questions(text):
	output = qg_pipeline(f"generate questions: {text}", num_return_sequences=3)
	questions = [q["generated_text"] for q in output]
	return (questions, text, 0) # This is stored in conversation_state

	# Play the next question
	def ask_question(state):
	questions, context, idx = state
	if idx >= len(questions):
	return "✅ All questions asked.", None, state

	question = questions[idx]
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
	tts.tts_to_file(text=question, file_path=fp.name)
	audio_path = fp.name

	return question, audio_path, (questions, context, idx + 1)

	# Transcribe and provide feedback
	def transcribe_and_feedback(audio_path, state):
	questions, context, idx = state
	if idx == 0 or idx > len(questions):
	return "❗ Please ask a question first.", state

	recognizer = sr.Recognizer()
	with sr.AudioFile(audio_path) as source:
	audio_data = recognizer.record(source)
	try:
	user_answer = recognizer.recognize_google(audio_data)
	except:
	return "❌ Could not understand your answer.", state

	# Compare with expected answer
	question = questions[idx - 1]
	expected = extract_answer(question, context)
	ratio = SequenceMatcher(None, user_answer.lower(), expected.lower()).ratio()
	if ratio > 0.6:
	feedback = f"✅ Good answer: {user_answer}"
	else:
	feedback = f"❌ Try again. You said: {user_answer}"

	return feedback, (questions, context, idx)

	# Gradio UI
	with gr.Blocks() as app:
	gr.Markdown("## 🎓 Interactive Speaking Practice")

	with gr.Row():
	course_text = gr.Textbox(lines=8, label="📘 Paste Coursebook Text")
	gen_btn = gr.Button("🔄 Generate Questions")

	question_text = gr.Textbox(label="🎤 Current Question")
	question_audio = gr.Audio(label="🔊 Listen to Question", type="filepath")
	ask_btn = gr.Button("▶️ Ask Next Question")

	user_audio = gr.Audio(label="🎙️ Your Answer (Record)", sources="microphone", type="filepath")
	transcribe_btn = gr.Button("📝 Submit Answer")
	feedback_output = gr.Textbox(label="💬 Feedback")

	conversation_state = gr.State()

	gen_btn.click(fn=generate_questions, inputs=course_text, outputs=conversation_state)
	ask_btn.click(fn=ask_question, inputs=conversation_state, outputs=[question_text, question_audio, conversation_state])
	transcribe_btn.click(fn=transcribe_and_feedback, inputs=[user_audio, conversation_state], outputs=[feedback_output, conversation_state])

	app.launch()