File size: 2,495 Bytes
1bbda65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# VoiceOfPatient.py
import logging
import speech_recognition as sr
from pydub import AudioSegment
from io import BytesIO
import os
from groq import Groq
from dotenv import load_dotenv
import warnings
from pydub import AudioSegment
from pydub.utils import which

warnings.filterwarnings("ignore")
load_dotenv()

# Get the ffmpeg path from environment and register it with pydub
ffmpeg_path = os.getenv("FFMPEG_PATH")
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if ffmpeg_path:
    AudioSegment.converter = ffmpeg_path
else:
    raise EnvironmentError("FFMPEG_PATH is not set. Please define it in the .env file.")

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def record_audio(file_path, timeout=20, phrase_time_limit=None):
    """

    Record audio from the microphone and save it as an MP3 file.



    Args:

        file_path (str): Path to save the recorded audio file.

        timeout (int): Max time to wait for speech to start (in seconds).

        phrase_time_limit (int or None): Max length of the speech (in seconds).

    """
    recognizer = sr.Recognizer()

    try:
        with sr.Microphone() as source:
            logging.info("Adjusting for ambient noise...")
            recognizer.adjust_for_ambient_noise(source, duration=1)
            logging.info("Start speaking now...")

            audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
            logging.info("Recording complete.")

            wav_data = audio_data.get_wav_data()
            audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
            audio_segment.export(file_path, format="mp3", bitrate="128k")

            logging.info(f"Audio saved to: {file_path}")

    except Exception as e:
        logging.error(f"An error occurred: {e}")

audio_file_path = "patientvoicetest.mp3"
record_audio(file_path=audio_file_path)

# Now setup speech to text model setup for transcribe the text from the voice
client = Groq(api_key=GROQ_API_KEY)
def transcribe_with_whisper(audio_file_path,model_name="meta-llama/llama-4-scout-17b-16e-instruct"):


    with open(audio_file_path, "rb") as audio_file:
        transcription = client.audio.transcriptions.create(
            model=model_name,
            file=audio_file, 
            language="en",
        )

    return transcription.text


if __name__ == "__main__":
    pass