Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import speech_recognition as sr
|
3 |
+
from pydub import AudioSegment
|
4 |
+
import os
|
5 |
+
from gtts import gTTS
|
6 |
+
|
7 |
+
def transcribe_audio(audio):
|
8 |
+
recognizer = sr.Recognizer()
|
9 |
+
audio = AudioSegment.from_file(audio)
|
10 |
+
audio.export("temp.wav", format="wav")
|
11 |
+
with sr.AudioFile("temp.wav") as source:
|
12 |
+
audio_data = recognizer.record(source)
|
13 |
+
try:
|
14 |
+
text = recognizer.recognize_google(audio_data)
|
15 |
+
except sr.UnknownValueError:
|
16 |
+
text = "Sorry, I could not understand the audio."
|
17 |
+
except sr.RequestError:
|
18 |
+
text = "Sorry, there was an error with the speech recognition service."
|
19 |
+
os.remove("temp.wav")
|
20 |
+
return text
|
21 |
+
|
22 |
+
def speak_text(text):
|
23 |
+
tts = gTTS(text)
|
24 |
+
tts.save("question.mp3")
|
25 |
+
return "question.mp3"
|
26 |
+
|
27 |
+
questions = [
|
28 |
+
{"context": "Let's talk about the story, My Dad's birthday party. Today is your dad’s birthday. So you and your family will have a party tonight.", "question": "What will your mom do for the party?", "label": "Transcription of Mom's action", "answer": "She will cook the dinner."},
|
29 |
+
{"context": "Look at the page 12 and 13.", "question": "What is she doing?", "label": "Transcription of Mom's action", "answer": "She is baking a cake."},
|
30 |
+
{"context": "How about your brother?", "question": "What will he do for the party?", "label": "Transcription of Brother's action", "answer": "He will decorate the room."},
|
31 |
+
{"context": "Look at the picture.", "question": "What is he doing?", "label": "Transcription of Brother's action", "answer": "He is hanging balloons."},
|
32 |
+
{"context": "Okay, next,", "question": "How about you? What will you do for the party?", "label": "Transcription of Your action", "answer": "I will set the table."},
|
33 |
+
{"context": "Let's move on to the story 'Owls are special'. Owls are nocturnal.", "question": "When do they sleep?", "label": "Transcription of sleep time", "answer": "They sleep during the day."},
|
34 |
+
{"context": "Look at the page 21, they have very special eyes.", "question": "So, what can they do with their special eyes?", "label": "Transcription of eye ability", "answer": "They can see in the dark."},
|
35 |
+
{"context": "Now, these questions are based on the story 'I will go shopping'. You have many things to buy. First, you need to buy a book. So, you will go to the bookstore. The bookstore is far away.", "question": "How will you go to the bookstore?", "label": "Transcription of transport method", "answer": "I will take the bus."},
|
36 |
+
{"context": "Now you need to buy some bread.", "question": "Where will you go?", "label": "Transcription of place", "answer": "I will go to the bakery."},
|
37 |
+
{"context": "Let's talk about the story 'Guide dogs'. Blind people can't see. But, they can still do things.", "question": "How can they do?", "label": "Transcription of how blind people do things", "answer": "They use their other senses."},
|
38 |
+
{"context": "Guide dogs help blind people with everything. For example, they bring things for them. They help them. They open doors for them.", "question": "What else can they do for them?", "label": "Transcription of other abilities", "answer": "They guide them while walking."}
|
39 |
+
]
|
40 |
+
|
41 |
+
current_question = 0
|
42 |
+
responses = []
|
43 |
+
|
44 |
+
def next_question():
|
45 |
+
global current_question
|
46 |
+
if current_question < len(questions):
|
47 |
+
context = questions[current_question].get("context", "")
|
48 |
+
question = questions[current_question]["question"]
|
49 |
+
full_text = f"{context} {question}"
|
50 |
+
question_audio = speak_text(full_text)
|
51 |
+
current_question += 1
|
52 |
+
return gr.update(value=question_audio, visible=True), gr.update(visible=True), questions[current_question-1]["label"], gr.update(visible=True), gr.update(visible=False)
|
53 |
+
else:
|
54 |
+
final_results = evaluate_responses()
|
55 |
+
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=final_results, visible=True)
|
56 |
+
|
57 |
+
def save_response(audio):
|
58 |
+
transcription = transcribe_audio(audio)
|
59 |
+
responses.append(transcription)
|
60 |
+
return transcription
|
61 |
+
|
62 |
+
def evaluate_responses():
|
63 |
+
result = "<h2>Your Responses:</h2><br>"
|
64 |
+
for i, question in enumerate(questions):
|
65 |
+
user_response = responses[i] if i < len(responses) else "No response"
|
66 |
+
result += f"<b>Q:</b> {question['question']}<br><b>Your Answer:</b> {user_response}<br><br>"
|
67 |
+
return result
|
68 |
+
|
69 |
+
with gr.Blocks() as demo:
|
70 |
+
gr.Markdown("### Interactive Questions")
|
71 |
+
|
72 |
+
question_audio = gr.Audio(label="Question", visible=False)
|
73 |
+
audio_input = gr.Audio(type="filepath", label="Your answer", visible=True)
|
74 |
+
transcription_output = gr.Textbox(label="Transcription", visible=True)
|
75 |
+
btn_next = gr.Button("Next", visible=True)
|
76 |
+
final_output = gr.HTML(visible=False)
|
77 |
+
|
78 |
+
def load_first_question():
|
79 |
+
return next_question()
|
80 |
+
|
81 |
+
demo.load(load_first_question, outputs=[question_audio, audio_input, transcription_output, btn_next, final_output])
|
82 |
+
|
83 |
+
btn_next.click(next_question, outputs=[question_audio, audio_input, transcription_output, btn_next, final_output])
|
84 |
+
audio_input.change(save_response, inputs=audio_input, outputs=transcription_output)
|
85 |
+
|
86 |
+
demo.launch()
|