iisadia commited on
Commit
cba85f4
·
verified ·
1 Parent(s): f909060

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -38
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  import torchaudio
4
- from audio_recorder_streamlit import audio_recorder
5
  import torch
6
  from io import BytesIO
7
  import hashlib
@@ -20,6 +19,17 @@ def process_audio(audio_bytes):
20
  waveform = resampler(waveform)
21
  return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
22
 
 
 
 
 
 
 
 
 
 
 
 
23
  # App Interface
24
  st.title("Real-Time Voice Typing")
25
  st.write("Speak and your words will appear immediately")
@@ -27,10 +37,8 @@ st.write("Speak and your words will appear immediately")
27
  # Initialize session state
28
  if 'text_input' not in st.session_state:
29
  st.session_state.text_input = ""
30
- if 'current_audio' not in st.session_state:
31
- st.session_state.current_audio = None
32
- if 'is_recording' not in st.session_state:
33
- st.session_state.is_recording = False
34
 
35
  # Text display
36
  text_input = st.text_area(
@@ -39,47 +47,30 @@ text_input = st.text_area(
39
  height=300
40
  )
41
 
42
- # Audio recorder with callback
43
- def handle_recording(audio_bytes):
44
- if audio_bytes:
45
- st.session_state.current_audio = audio_bytes
46
- process_current_audio()
47
 
48
- def process_current_audio():
49
- if st.session_state.current_audio:
50
- try:
51
- audio_input = process_audio(st.session_state.current_audio)
52
- whisper = load_model()
53
- transcribed_text = whisper(audio_input)["text"].strip()
 
 
 
 
 
54
 
55
- if transcribed_text:
56
- st.session_state.text_input += " " + transcribed_text
57
- st.session_state.current_audio = None
58
- st.rerun()
59
-
60
- except Exception as e:
61
- st.error(f"Error: {str(e)}")
62
-
63
- # Audio recorder component
64
- audio_bytes = audio_recorder(
65
- pause_threshold=1.0, # Faster response
66
- text="Click to speak",
67
- recording_color="#e8b62c",
68
- neutral_color="#6aa36f",
69
- callback=handle_recording,
70
- key="audio_recorder"
71
- )
72
-
73
- # Process any pending audio
74
- if st.session_state.current_audio:
75
- process_current_audio()
76
 
77
  # Control buttons
78
  col1, col2 = st.columns(2)
79
  with col1:
80
  if st.button("Clear Text"):
81
  st.session_state.text_input = ""
82
- st.session_state.current_audio = None
83
  st.rerun()
84
  with col2:
85
  st.download_button(
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  import torchaudio
 
4
  import torch
5
  from io import BytesIO
6
  import hashlib
 
19
  waveform = resampler(waveform)
20
  return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
21
 
22
+ # Custom audio recorder component
23
+ def audio_recorder_component():
24
+ return st.audio(
25
+ "microphone",
26
+ format="audio/wav",
27
+ start_recording=True,
28
+ pause_threshold=1.0,
29
+ sample_rate=16000,
30
+ key="audio_recorder"
31
+ )
32
+
33
  # App Interface
34
  st.title("Real-Time Voice Typing")
35
  st.write("Speak and your words will appear immediately")
 
37
  # Initialize session state
38
  if 'text_input' not in st.session_state:
39
  st.session_state.text_input = ""
40
+ if 'last_audio' not in st.session_state:
41
+ st.session_state.last_audio = None
 
 
42
 
43
  # Text display
44
  text_input = st.text_area(
 
47
  height=300
48
  )
49
 
50
+ # Audio recording
51
+ audio_bytes = audio_recorder_component()
 
 
 
52
 
53
+ # Process audio when available
54
+ if audio_bytes and audio_bytes != st.session_state.last_audio:
55
+ st.session_state.last_audio = audio_bytes
56
+ try:
57
+ audio_input = process_audio(audio_bytes)
58
+ whisper = load_model()
59
+ transcribed_text = whisper(audio_input)["text"].strip()
60
+
61
+ if transcribed_text:
62
+ st.session_state.text_input += " " + transcribed_text
63
+ st.rerun()
64
 
65
+ except Exception as e:
66
+ st.error(f"Error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  # Control buttons
69
  col1, col2 = st.columns(2)
70
  with col1:
71
  if st.button("Clear Text"):
72
  st.session_state.text_input = ""
73
+ st.session_state.last_audio = None
74
  st.rerun()
75
  with col2:
76
  st.download_button(