ales commited on
Commit
aca9f3d
·
1 Parent(s): d71b5df

updated output format

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import json
2
 
3
  import numpy as np
4
 
@@ -19,6 +19,7 @@ LM_HUB_FP = 'language_model/cv8be_5gram.bin'
19
 
20
  def main(audio_fp: str):
21
  audio, sampling_rate = torchaudio.load(audio_fp, normalize=True)
 
22
 
23
  # convert stereo to mono
24
  converted_to_mono = False
@@ -39,16 +40,18 @@ def main(audio_fp: str):
39
 
40
  # recognize speech
41
  res = pipeline(inputs=inputs)
42
- # text_recognized = res['text'][0]
43
 
 
 
44
  res['sampling_rate_orig'] = sampling_rate
45
- res['init_audio_shape'] = audio.shape
46
  res['converted_to_mono'] = converted_to_mono
 
47
  res['inputs_shape'] = inputs.shape
48
  res['inputs_max'] = np.max(inputs).item()
49
  res['inputs_min'] = np.min(inputs).item()
50
 
51
- res_str = json.dumps(res, indent=2)
52
 
53
  return res_str
54
 
 
1
+ from pprint import pformat
2
 
3
  import numpy as np
4
 
 
19
 
20
  def main(audio_fp: str):
21
  audio, sampling_rate = torchaudio.load(audio_fp, normalize=True)
22
+ init_audio_shape = audio.shape
23
 
24
  # convert stereo to mono
25
  converted_to_mono = False
 
40
 
41
  # recognize speech
42
  res = pipeline(inputs=inputs)
 
43
 
44
+ # add additional information to the output
45
+ res['text'] = res['text'][0] # unpack batch of size 1
46
  res['sampling_rate_orig'] = sampling_rate
47
+ res['init_audio_shape'] = init_audio_shape
48
  res['converted_to_mono'] = converted_to_mono
49
+ res['resampled_audio_shape'] = audio_resampled.shape
50
  res['inputs_shape'] = inputs.shape
51
  res['inputs_max'] = np.max(inputs).item()
52
  res['inputs_min'] = np.min(inputs).item()
53
 
54
+ res_str = pformat(res)
55
 
56
  return res_str
57