pragnakalp commited on
Commit
b477175
·
1 Parent(s): e5bc2ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -97,11 +97,13 @@ def one_shot(image,input_text,gender):
97
  sound = AudioSegment.from_file(f.name, format="mp3")
98
  sound.export("/content/audio.wav", format="wav")
99
 
100
- waveform, sample_rate = torchaudio.load(audio_in)
101
  torchaudio.save("/content/audio.wav", waveform, sample_rate, encoding="PCM_S", bits_per_sample=16)
102
  image = Image.open(image_in)
103
  image = pad_image(image)
104
  image.save("/content/image_pre.png")
 
 
105
  pocketsphinx_run = subprocess.run(['pocketsphinx', '-phone_align', 'yes', 'single', '/content/audio.wav'], check=True, capture_output=True)
106
  jq_run = subprocess.run(['jq', '[.w[]|{word: (.t | ascii_upcase | sub("<S>"; "sil") | sub("<SIL>"; "sil") | sub("\\\(2\\\)"; "") | sub("\\\(3\\\)"; "") | sub("\\\(4\\\)"; "") | sub("\\\[SPEECH\\\]"; "SIL") | sub("\\\[NOISE\\\]"; "SIL")), phones: [.w[]|{ph: .t | sub("\\\+SPN\\\+"; "SIL") | sub("\\\+NSN\\\+"; "SIL"), bg: (.b*100)|floor, ed: (.b*100+.d*100)|floor}]}]'], input=pocketsphinx_run.stdout, capture_output=True)
107
  with open("test.json", "w") as f:
 
97
  sound = AudioSegment.from_file(f.name, format="mp3")
98
  sound.export("/content/audio.wav", format="wav")
99
 
100
+ waveform, sample_rate = torchaudio.load("/content/audio.wav")
101
  torchaudio.save("/content/audio.wav", waveform, sample_rate, encoding="PCM_S", bits_per_sample=16)
102
  image = Image.open(image_in)
103
  image = pad_image(image)
104
  image.save("/content/image_pre.png")
105
+ return "/content/audio.wav"
106
+ exit()
107
  pocketsphinx_run = subprocess.run(['pocketsphinx', '-phone_align', 'yes', 'single', '/content/audio.wav'], check=True, capture_output=True)
108
  jq_run = subprocess.run(['jq', '[.w[]|{word: (.t | ascii_upcase | sub("<S>"; "sil") | sub("<SIL>"; "sil") | sub("\\\(2\\\)"; "") | sub("\\\(3\\\)"; "") | sub("\\\(4\\\)"; "") | sub("\\\[SPEECH\\\]"; "SIL") | sub("\\\[NOISE\\\]"; "SIL")), phones: [.w[]|{ph: .t | sub("\\\+SPN\\\+"; "SIL") | sub("\\\+NSN\\\+"; "SIL"), bg: (.b*100)|floor, ed: (.b*100+.d*100)|floor}]}]'], input=pocketsphinx_run.stdout, capture_output=True)
109
  with open("test.json", "w") as f: