etchen commited on
Commit
eb7096f
verified
1 Parent(s): ee8c172

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import numpy as np
 
3
 
4
  # import spaces #[uncomment to use ZeroGPU]
5
  import torch
@@ -18,14 +19,20 @@ pipe = pipeline(task="automatic-speech-recognition", model=model_repo_id, device
18
 
19
  # @spaces.GPU #[uncomment to use ZeroGPU]
20
  def infer(
21
- audio
 
22
  ):
23
  sampling_rate, wav = audio
24
  if wav.ndim > 1:
25
  wav = wav.mean(axis=1)
26
  wav = wav.astype(np.float32)
27
  wav /= np.max(np.abs(wav))
28
- return pipe(wav)['text']
 
 
 
 
 
29
 
30
  css = """
31
  #col-container {
@@ -36,11 +43,18 @@ css = """
36
 
37
  with gr.Blocks(css=css) as demo:
38
  gr.Markdown(" # PhonoLearn")
 
39
  input_audio = gr.Audio(
40
  sources=["microphone", "upload"]
41
  )
42
- output = gr.Textbox(label='Output')
43
- input_audio.input(fn=infer, inputs=input_audio, outputs=output)
 
 
 
 
 
 
44
 
45
  if __name__ == "__main__":
46
  demo.launch()
 
1
  import gradio as gr
2
  import numpy as np
3
+ from difflib import Differ
4
 
5
  # import spaces #[uncomment to use ZeroGPU]
6
  import torch
 
19
 
20
  # @spaces.GPU #[uncomment to use ZeroGPU]
21
  def infer(
22
+ audio,
23
+ target
24
  ):
25
  sampling_rate, wav = audio
26
  if wav.ndim > 1:
27
  wav = wav.mean(axis=1)
28
  wav = wav.astype(np.float32)
29
  wav /= np.max(np.abs(wav))
30
+ user_pron = pipe(wav)['text']
31
+
32
+ # compare texts
33
+ d = Differ()
34
+ d_toks [(i[2:], i[0]) if i[0] != " " else None for i in d.compare(target, user_pron)]
35
+ return (user_pron, d_toks)
36
 
37
  css = """
38
  #col-container {
 
43
 
44
  with gr.Blocks(css=css) as demo:
45
  gr.Markdown(" # PhonoLearn")
46
+ target = gr.Textbox(label='Practice Sentence (T芒i-l么)')
47
  input_audio = gr.Audio(
48
  sources=["microphone", "upload"]
49
  )
50
+ output = gr.Textbox(label='Your Pronunciation')
51
+ diff = gr.HighlightedText(
52
+ label='Comparison',
53
+ combine_adjacent=True,
54
+ show_legend=True,
55
+ color_map={'+': 'red', '-': 'green'}
56
+ )
57
+ input_audio.input(fn=infer, inputs=[input_audio, target], outputs=[output, diff])
58
 
59
  if __name__ == "__main__":
60
  demo.launch()