KASOTI_GAME / app.py
iisadia's picture
Update app.py
8b91795 verified
raw
history blame
2.09 kB
import streamlit as st
from transformers import pipeline
import numpy as np
import soundfile as sf
from io import BytesIO
from stt_audiorec import stt_audiorec
# Load Whisper model
@st.cache_resource
def load_model():
return pipeline("automatic-speech-recognition", model="openai/whisper-small")
st.title("Text Entry with Voice Input")
st.write("Enter text manually or use voice input:")
# Initialize session state
if 'combined_text' not in st.session_state:
st.session_state.combined_text = ""
# Create columns layout
col1, col2 = st.columns(2)
with col1:
# Text input
text_input = st.text_area("Type your text here:", height=200)
with col2:
# Audio input
st.write("Record your voice:")
audio_bytes = stt_audiorec()
# Process audio when recording is available
if audio_bytes:
try:
# Convert bytes to audio array
with BytesIO(audio_bytes) as audio_file:
audio_data, sample_rate = sf.read(audio_file)
# Convert stereo to mono if needed
if len(audio_data.shape) > 1:
audio_data = np.mean(audio_data, axis=1)
# Create input for Whisper
audio_dict = {"raw": audio_data, "sampling_rate": sample_rate}
# Transcribe audio
whisper = load_model()
transcribed_text = whisper(audio_dict)["text"]
# Update session state
st.session_state.combined_text = f"{text_input}\n{transcribed_text}".strip()
except Exception as e:
st.error(f"Error processing audio: {str(e)}")
# Combine inputs when button is clicked
if st.button("Submit"):
if not text_input and not audio_bytes:
st.warning("Please enter text or record audio")
else:
# Display combined text
st.subheader("Combined Input:")
st.write(st.session_state.combined_text)
# Add download button
st.download_button(
label="Download Text",
data=st.session_state.combined_text,
file_name="combined_input.txt",
mime="text/plain"
)