Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
import numpy as np | |
import soundfile as sf | |
from io import BytesIO | |
from stt_audiorec import stt_audiorec | |
# Load Whisper model | |
def load_model(): | |
return pipeline("automatic-speech-recognition", model="openai/whisper-small") | |
st.title("Text Entry with Voice Input") | |
st.write("Enter text manually or use voice input:") | |
# Initialize session state | |
if 'combined_text' not in st.session_state: | |
st.session_state.combined_text = "" | |
# Create columns layout | |
col1, col2 = st.columns(2) | |
with col1: | |
# Text input | |
text_input = st.text_area("Type your text here:", height=200) | |
with col2: | |
# Audio input | |
st.write("Record your voice:") | |
audio_bytes = stt_audiorec() | |
# Process audio when recording is available | |
if audio_bytes: | |
try: | |
# Convert bytes to audio array | |
with BytesIO(audio_bytes) as audio_file: | |
audio_data, sample_rate = sf.read(audio_file) | |
# Convert stereo to mono if needed | |
if len(audio_data.shape) > 1: | |
audio_data = np.mean(audio_data, axis=1) | |
# Create input for Whisper | |
audio_dict = {"raw": audio_data, "sampling_rate": sample_rate} | |
# Transcribe audio | |
whisper = load_model() | |
transcribed_text = whisper(audio_dict)["text"] | |
# Update session state | |
st.session_state.combined_text = f"{text_input}\n{transcribed_text}".strip() | |
except Exception as e: | |
st.error(f"Error processing audio: {str(e)}") | |
# Combine inputs when button is clicked | |
if st.button("Submit"): | |
if not text_input and not audio_bytes: | |
st.warning("Please enter text or record audio") | |
else: | |
# Display combined text | |
st.subheader("Combined Input:") | |
st.write(st.session_state.combined_text) | |
# Add download button | |
st.download_button( | |
label="Download Text", | |
data=st.session_state.combined_text, | |
file_name="combined_input.txt", | |
mime="text/plain" | |
) |