englissi commited on
Commit
6ebe78d
·
verified ·
1 Parent(s): 69835d8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import speech_recognition as sr
3
+ from pydub import AudioSegment
4
+ import os
5
+ from gtts import gTTS
6
+
7
+ def transcribe_audio(audio):
8
+ recognizer = sr.Recognizer()
9
+ audio = AudioSegment.from_file(audio)
10
+ audio.export("temp.wav", format="wav")
11
+ with sr.AudioFile("temp.wav") as source:
12
+ audio_data = recognizer.record(source)
13
+ try:
14
+ text = recognizer.recognize_google(audio_data)
15
+ except sr.UnknownValueError:
16
+ text = "Sorry, I could not understand the audio."
17
+ except sr.RequestError:
18
+ text = "Sorry, there was an error with the speech recognition service."
19
+ os.remove("temp.wav")
20
+ return text
21
+
22
+ def speak_text(text):
23
+ tts = gTTS(text)
24
+ tts.save("question.mp3")
25
+ return "question.mp3"
26
+
27
+ questions = [
28
+ {"context": "Let's talk about the story, My Dad's birthday party. Today is your dad’s birthday. So you and your family will have a party tonight.", "question": "What will your mom do for the party?", "label": "Transcription of Mom's action", "answer": "She will cook the dinner."},
29
+ {"context": "Look at the page 12 and 13.", "question": "What is she doing?", "label": "Transcription of Mom's action", "answer": "She is baking a cake."},
30
+ {"context": "How about your brother?", "question": "What will he do for the party?", "label": "Transcription of Brother's action", "answer": "He will decorate the room."},
31
+ {"context": "Look at the picture.", "question": "What is he doing?", "label": "Transcription of Brother's action", "answer": "He is hanging balloons."},
32
+ {"context": "Okay, next,", "question": "How about you? What will you do for the party?", "label": "Transcription of Your action", "answer": "I will set the table."},
33
+ {"context": "Let's move on to the story 'Owls are special'. Owls are nocturnal.", "question": "When do they sleep?", "label": "Transcription of sleep time", "answer": "They sleep during the day."},
34
+ {"context": "Look at the page 21, they have very special eyes.", "question": "So, what can they do with their special eyes?", "label": "Transcription of eye ability", "answer": "They can see in the dark."},
35
+ {"context": "Now, these questions are based on the story 'I will go shopping'. You have many things to buy. First, you need to buy a book. So, you will go to the bookstore. The bookstore is far away.", "question": "How will you go to the bookstore?", "label": "Transcription of transport method", "answer": "I will take the bus."},
36
+ {"context": "Now you need to buy some bread.", "question": "Where will you go?", "label": "Transcription of place", "answer": "I will go to the bakery."},
37
+ {"context": "Let's talk about the story 'Guide dogs'. Blind people can't see. But, they can still do things.", "question": "How can they do?", "label": "Transcription of how blind people do things", "answer": "They use their other senses."},
38
+ {"context": "Guide dogs help blind people with everything. For example, they bring things for them. They help them. They open doors for them.", "question": "What else can they do for them?", "label": "Transcription of other abilities", "answer": "They guide them while walking."}
39
+ ]
40
+
41
+ current_question = 0
42
+ responses = []
43
+
44
+ def next_question():
45
+ global current_question
46
+ if current_question < len(questions):
47
+ context = questions[current_question].get("context", "")
48
+ question = questions[current_question]["question"]
49
+ full_text = f"{context} {question}"
50
+ question_audio = speak_text(full_text)
51
+ current_question += 1
52
+ return gr.update(value=question_audio, visible=True), gr.update(visible=True), questions[current_question-1]["label"], gr.update(visible=True), gr.update(visible=False)
53
+ else:
54
+ final_results = evaluate_responses()
55
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=final_results, visible=True)
56
+
57
+ def save_response(audio):
58
+ transcription = transcribe_audio(audio)
59
+ responses.append(transcription)
60
+ return transcription
61
+
62
+ def evaluate_responses():
63
+ result = "<h2>Your Responses:</h2><br>"
64
+ for i, question in enumerate(questions):
65
+ user_response = responses[i] if i < len(responses) else "No response"
66
+ result += f"<b>Q:</b> {question['question']}<br><b>Your Answer:</b> {user_response}<br><br>"
67
+ return result
68
+
69
+ with gr.Blocks() as demo:
70
+ gr.Markdown("### Interactive Questions")
71
+
72
+ question_audio = gr.Audio(label="Question", visible=False)
73
+ audio_input = gr.Audio(type="filepath", label="Your answer", visible=True)
74
+ transcription_output = gr.Textbox(label="Transcription", visible=True)
75
+ btn_next = gr.Button("Next", visible=True)
76
+ final_output = gr.HTML(visible=False)
77
+
78
+ def load_first_question():
79
+ return next_question()
80
+
81
+ demo.load(load_first_question, outputs=[question_audio, audio_input, transcription_output, btn_next, final_output])
82
+
83
+ btn_next.click(next_question, outputs=[question_audio, audio_input, transcription_output, btn_next, final_output])
84
+ audio_input.change(save_response, inputs=audio_input, outputs=transcription_output)
85
+
86
+ demo.launch()