Spaces:
Runtime error
Runtime error
refine recording
Browse files
app.py
CHANGED
|
@@ -22,10 +22,7 @@ WHISPER_MODEL.to(device)
|
|
| 22 |
def transcribe(aud_inp):
|
| 23 |
if aud_inp is None:
|
| 24 |
return ""
|
| 25 |
-
|
| 26 |
-
aud = whisper.load_audio(aud_inp)
|
| 27 |
-
elif isinstance(aud_inp, bytes): # if st_audiorec
|
| 28 |
-
aud = np.frombuffer(aud_inp, dtype=np.uint8).flatten().astype(np.float32) / 255.0
|
| 29 |
aud = whisper.pad_or_trim(aud)
|
| 30 |
mel = whisper.log_mel_spectrogram(aud).to(device)
|
| 31 |
_, probs = WHISPER_MODEL.detect_language(mel)
|
|
@@ -43,6 +40,22 @@ def transcribe(aud_inp):
|
|
| 43 |
return result_text
|
| 44 |
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
st.set_page_config(page_title='ChatGPT Assistant', layout='wide', page_icon='🤖')
|
| 47 |
# 自定义元素样式
|
| 48 |
st.markdown(css_code, unsafe_allow_html=True)
|
|
@@ -232,6 +245,7 @@ with tap_input:
|
|
| 232 |
wav_audio_data = st_audiorec()
|
| 233 |
if wav_audio_data is not None:
|
| 234 |
st.audio(wav_audio_data, format='audio/wav')
|
|
|
|
| 235 |
user_input = transcribe(wav_audio_data)
|
| 236 |
|
| 237 |
if submitted or wav_audio_data is not None:
|
|
|
|
| 22 |
def transcribe(aud_inp):
|
| 23 |
if aud_inp is None:
|
| 24 |
return ""
|
| 25 |
+
aud = whisper.load_audio(aud_inp)
|
|
|
|
|
|
|
|
|
|
| 26 |
aud = whisper.pad_or_trim(aud)
|
| 27 |
mel = whisper.log_mel_spectrogram(aud).to(device)
|
| 28 |
_, probs = WHISPER_MODEL.detect_language(mel)
|
|
|
|
| 40 |
return result_text
|
| 41 |
|
| 42 |
|
| 43 |
+
def bytes_to_wav(wav_bytes, output_wav_file = "output.wav",
|
| 44 |
+
sample_width = 1, # 1 byte per sample
|
| 45 |
+
sample_rate = 44100, # Sample rate in Hz
|
| 46 |
+
num_channels = 1, # Mono audio
|
| 47 |
+
):
|
| 48 |
+
# Create a WAV file in pcm_s16le format using the wave module
|
| 49 |
+
with wave.open(output_wav_file, 'wb') as wav_file:
|
| 50 |
+
wav_file.setnchannels(num_channels)
|
| 51 |
+
wav_file.setsampwidth(sample_width)
|
| 52 |
+
wav_file.setframerate(sample_rate)
|
| 53 |
+
wav_file.setcomptype('NONE', 'not compressed')
|
| 54 |
+
# Write the audio data from wav_bytes to the WAV file
|
| 55 |
+
wav_file.writeframes(wav_bytes)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
######################################################################################
|
| 59 |
st.set_page_config(page_title='ChatGPT Assistant', layout='wide', page_icon='🤖')
|
| 60 |
# 自定义元素样式
|
| 61 |
st.markdown(css_code, unsafe_allow_html=True)
|
|
|
|
| 245 |
wav_audio_data = st_audiorec()
|
| 246 |
if wav_audio_data is not None:
|
| 247 |
st.audio(wav_audio_data, format='audio/wav')
|
| 248 |
+
bytes_to_wav(wav_audio_data, sample_rate=16000)
|
| 249 |
user_input = transcribe(wav_audio_data)
|
| 250 |
|
| 251 |
if submitted or wav_audio_data is not None:
|