Spaces:

Waris01
/

ChatWithDoctorAny

Sleeping

App Files Files Community

ChatWithDoctorAny / VoiceOfPatient.py

Waris01

Upload 6 files

1bbda65 verified 4 months ago

raw

history blame contribute delete

2.5 kB

	# VoiceOfPatient.py
	import logging
	import speech_recognition as sr
	from pydub import AudioSegment
	from io import BytesIO
	import os
	from groq import Groq
	from dotenv import load_dotenv
	import warnings
	from pydub import AudioSegment
	from pydub.utils import which

	warnings.filterwarnings("ignore")
	load_dotenv()

	# Get the ffmpeg path from environment and register it with pydub
	ffmpeg_path = os.getenv("FFMPEG_PATH")
	GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
	if ffmpeg_path:
	AudioSegment.converter = ffmpeg_path
	else:
	raise EnvironmentError("FFMPEG_PATH is not set. Please define it in the .env file.")

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	def record_audio(file_path, timeout=20, phrase_time_limit=None):
	"""
	Record audio from the microphone and save it as an MP3 file.

	Args:
	file_path (str): Path to save the recorded audio file.
	timeout (int): Max time to wait for speech to start (in seconds).
	phrase_time_limit (int or None): Max length of the speech (in seconds).
	"""
	recognizer = sr.Recognizer()

	try:
	with sr.Microphone() as source:
	logging.info("Adjusting for ambient noise...")
	recognizer.adjust_for_ambient_noise(source, duration=1)
	logging.info("Start speaking now...")

	audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
	logging.info("Recording complete.")

	wav_data = audio_data.get_wav_data()
	audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
	audio_segment.export(file_path, format="mp3", bitrate="128k")

	logging.info(f"Audio saved to: {file_path}")

	except Exception as e:
	logging.error(f"An error occurred: {e}")

	audio_file_path = "patientvoicetest.mp3"
	record_audio(file_path=audio_file_path)

	# Now setup speech to text model setup for transcribe the text from the voice
	client = Groq(api_key=GROQ_API_KEY)
	def transcribe_with_whisper(audio_file_path,model_name="meta-llama/llama-4-scout-17b-16e-instruct"):


	with open(audio_file_path, "rb") as audio_file:
	transcription = client.audio.transcriptions.create(
	model=model_name,
	file=audio_file,
	language="en",
	)

	return transcription.text


	if __name__ == "__main__":
	pass