Sofia Casadei commited on
Commit
9784bd2
Β·
1 Parent(s): 950ef75
Files changed (2) hide show
  1. main.py +3 -3
  2. static/index-screen.html +1 -1
main.py CHANGED
@@ -150,17 +150,17 @@ stream = Stream(
150
  # If, after the user started speaking, there is a chunk with less than speech_threshold seconds of speech, the user stopped speaking. (default 0.1)
151
  speech_threshold=0.1,
152
  # Max duration of speech chunks before the handler is triggered, even if a pause is not detected by the VAD model. (default -inf)
153
- max_continuous_speech_s=5
154
  ),
155
  model_options=SileroVadOptions(
156
  # Threshold for what is considered speech (default 0.5)
157
  threshold=0.5,
158
  # Final speech chunks shorter min_speech_duration_ms are thrown out (default 250)
159
- min_speech_duration_ms=200,
160
  # Max duration of speech chunks, longer will be split at the timestamp of the last silence that lasts more than 100ms (if any) or just before max_speech_duration_s (default float('inf')) (used internally in the VAD algorithm to split the audio that's passed to the algorithm)
161
  max_speech_duration_s=5,
162
  # Wait for ms at the end of each speech chunk before separating it (default 2000)
163
- min_silence_duration_ms=100,
164
  # Chunk size for VAD model. Can be 512, 1024, 1536 for 16k s.r. (default 1024)
165
  window_size_samples=1024,
166
  # Final speech chunks are padded by speech_pad_ms each side (default 400)
 
150
  # If, after the user started speaking, there is a chunk with less than speech_threshold seconds of speech, the user stopped speaking. (default 0.1)
151
  speech_threshold=0.1,
152
  # Max duration of speech chunks before the handler is triggered, even if a pause is not detected by the VAD model. (default -inf)
153
+ max_continuous_speech_s=15
154
  ),
155
  model_options=SileroVadOptions(
156
  # Threshold for what is considered speech (default 0.5)
157
  threshold=0.5,
158
  # Final speech chunks shorter min_speech_duration_ms are thrown out (default 250)
159
+ min_speech_duration_ms=250,
160
  # Max duration of speech chunks, longer will be split at the timestamp of the last silence that lasts more than 100ms (if any) or just before max_speech_duration_s (default float('inf')) (used internally in the VAD algorithm to split the audio that's passed to the algorithm)
161
  max_speech_duration_s=5,
162
  # Wait for ms at the end of each speech chunk before separating it (default 2000)
163
+ min_silence_duration_ms=200,
164
  # Chunk size for VAD model. Can be 512, 1024, 1536 for 16k s.r. (default 1024)
165
  window_size_samples=1024,
166
  # Final speech chunks are padded by speech_pad_ms each side (default 400)
static/index-screen.html CHANGED
@@ -54,7 +54,7 @@
54
  background: transparent; /* Transparent background (no highlighting) */
55
  border-radius: 0; /* No rounded corners */
56
  line-height: 1.6; /* Increases line spacing for readability */
57
- font-size: 3.5rem; /* rem means relative to the root font size */
58
  font-weight: 500; /* 500 = medium weight, 700 = bold */
59
  max-width: 98%; /* Full width within container */
60
  white-space: normal; /* Allows text to wrap normally */
 
54
  background: transparent; /* Transparent background (no highlighting) */
55
  border-radius: 0; /* No rounded corners */
56
  line-height: 1.6; /* Increases line spacing for readability */
57
+ font-size: 6rem; /* rem means relative to the root font size */
58
  font-weight: 500; /* 500 = medium weight, 700 = bold */
59
  max-width: 98%; /* Full width within container */
60
  white-space: normal; /* Allows text to wrap normally */