# VoiceOfPatient.py import logging import speech_recognition as sr from pydub import AudioSegment from io import BytesIO import os from groq import Groq from dotenv import load_dotenv import warnings from pydub import AudioSegment from pydub.utils import which warnings.filterwarnings("ignore") load_dotenv() # Get the ffmpeg path from environment and register it with pydub ffmpeg_path = os.getenv("FFMPEG_PATH") GROQ_API_KEY = os.environ.get("GROQ_API_KEY") if ffmpeg_path: AudioSegment.converter = ffmpeg_path else: raise EnvironmentError("FFMPEG_PATH is not set. Please define it in the .env file.") # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def record_audio(file_path, timeout=20, phrase_time_limit=None): """ Record audio from the microphone and save it as an MP3 file. Args: file_path (str): Path to save the recorded audio file. timeout (int): Max time to wait for speech to start (in seconds). phrase_time_limit (int or None): Max length of the speech (in seconds). """ recognizer = sr.Recognizer() try: with sr.Microphone() as source: logging.info("Adjusting for ambient noise...") recognizer.adjust_for_ambient_noise(source, duration=1) logging.info("Start speaking now...") audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit) logging.info("Recording complete.") wav_data = audio_data.get_wav_data() audio_segment = AudioSegment.from_wav(BytesIO(wav_data)) audio_segment.export(file_path, format="mp3", bitrate="128k") logging.info(f"Audio saved to: {file_path}") except Exception as e: logging.error(f"An error occurred: {e}") audio_file_path = "patientvoicetest.mp3" record_audio(file_path=audio_file_path) # Now setup speech to text model setup for transcribe the text from the voice client = Groq(api_key=GROQ_API_KEY) def transcribe_with_whisper(audio_file_path,model_name="meta-llama/llama-4-scout-17b-16e-instruct"): with open(audio_file_path, "rb") as audio_file: transcription = client.audio.transcriptions.create( model=model_name, file=audio_file, language="en", ) return transcription.text if __name__ == "__main__": pass