File size: 1,559 Bytes
ab2f0cf
 
 
 
d587e3c
ab2f0cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import requests
import pytz
import streamlit as st
from datetime import datetime
from audio_recorder_streamlit import audio_recorder

API_URL = "https://tonpixzfvq3791u9.us-east-1.aws.endpoints.huggingface.cloud"
key = 'test-public-anonymous-T4-Whisper-Small-En'
headers = {
    "Authorization": "Bearer {key}",
    "Content-Type": "audio/wav"
}

def generate_filename(prompt, file_type):
    central = pytz.timezone('US/Central')
    safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
    replaced_prompt = prompt.replace(" ", "_").replace("\n", "_")
    safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90]
    return f"{safe_date_time}_{safe_prompt}.{file_type}"

def query(filename):
    with open(filename, "rb") as f:
        data = f.read()
    response = requests.post(API_URL, headers=headers, data=data)
    return response.json()

def save_and_play_audio(audio_recorder):
    audio_bytes = audio_recorder.get_audio()
    if audio_bytes:
        filename = generate_filename("Recording", "wav")
        with open(filename, 'wb') as f:
            f.write(audio_bytes)
        st.audio(audio_bytes, format="audio/wav")
        return filename
    return None

st.title("Speech to Text")
st.write("Record your speech and get the text.")

audio_recorder = AudioRecorder()
audio_recorder.start()
if st.button("Stop recording"):
    audio_recorder.stop()
    filename = save_and_play_audio(audio_recorder)
    if filename:
        output = query(filename)
        st.write("Transcription:")
        st.write(output)