|
import httpx |
|
from queue import Queue |
|
import logging |
|
from datetime import UTC, datetime, timedelta |
|
from time import sleep |
|
import pickle |
|
|
|
import speech_recognition as sr |
|
|
|
from audio_utils import get_microphone, get_speech_recognizer, get_all_audio_queue, to_audio_array, AudioChunk |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
TRANSCRIBING_SERVER = "http://localhost:3535/transcribe" |
|
|
|
|
|
def main(): |
|
recording_duration = 2 |
|
sample_rate = 16000 |
|
energy_threshold = 300 |
|
|
|
data_queue = Queue() |
|
|
|
microphone = get_microphone(sample_rate=sample_rate) |
|
speech_recognizer = get_speech_recognizer(energy_threshold=energy_threshold) |
|
|
|
with microphone: |
|
speech_recognizer.adjust_for_ambient_noise(source=microphone) |
|
|
|
def record_callback(_, audio: sr.AudioData) -> None: |
|
data = audio.get_raw_data() |
|
data_queue.put(data) |
|
|
|
speech_recognizer.listen_in_background(source=microphone, callback=record_callback) |
|
|
|
print("\n🎤 Microphone is now listening...\n") |
|
|
|
current_audio_chunk = AudioChunk(start_time=datetime.now(tz=UTC)) |
|
|
|
while True: |
|
try: |
|
now = datetime.now(tz=UTC) |
|
|
|
if not data_queue.empty(): |
|
|
|
if now - current_audio_chunk.start_time > timedelta(seconds=recording_duration): |
|
current_audio_chunk.end_time = now |
|
|
|
|
|
audio_data = get_all_audio_queue(data_queue) |
|
audio_np_array = to_audio_array(audio_data) |
|
|
|
if current_audio_chunk.is_complete: |
|
print('start serialize') |
|
serialized = pickle.dumps(current_audio_chunk.audio_array) |
|
print('end serialize') |
|
|
|
print('start req') |
|
response = httpx.post(TRANSCRIBING_SERVER, data=serialized) |
|
print('req done', response.text, response.status_code) |
|
|
|
|
|
|
|
|
|
|
|
current_audio_chunk = AudioChunk( |
|
audio_array=audio_np_array, start_time=datetime.now(tz=UTC) |
|
) |
|
|
|
else: |
|
current_audio_chunk.update_array(audio_np_array) |
|
|
|
|
|
print("", end="", flush=True) |
|
|
|
|
|
sleep(0.25) |
|
except KeyboardInterrupt: |
|
current_audio_chunk.end_time = datetime.now(tz=UTC) |
|
if current_audio_chunk.is_complete: |
|
logger.warning("⚠️ Transcribing last chunk...") |
|
|
|
|
|
|
|
|
|
|
|
break |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|