Waris01 commited on
Commit
1bbda65
·
verified ·
1 Parent(s): 43682e1

Upload 6 files

Browse files
Files changed (6) hide show
  1. ImageForDoctor.py +54 -0
  2. README.md +2 -14
  3. VoiceOfDoctor.py +49 -0
  4. VoiceOfPatient.py +75 -0
  5. app.py +48 -0
  6. requirements.txt +7 -0
ImageForDoctor.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from groq import Groq
4
+ import base64 #bit to string
5
+
6
+
7
+ #setup1 groq api setup
8
+ load_dotenv()
9
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
10
+ client = Groq(api_key=GROQ_API_KEY)
11
+
12
+
13
+
14
+ #setup2 the image into encoded formate
15
+ # image_path = "acne.jpg"
16
+ model = "llama-3.2-90b-vision-preview"
17
+ def encodeimage(image_path):
18
+ if not os.path.exists(image_path):
19
+ raise FileNotFoundError(f"Image file not found: {image_path}")
20
+ with open(image_path, "rb") as image_file:
21
+ return base64.b64encode(image_file.read()).decode("utf-8")
22
+
23
+
24
+ #step3 Setup the grof for vision
25
+ def AnalyzeImagewithQuery(query,encode_imgae):
26
+ messages = [
27
+ {
28
+ "role" : "user",
29
+ "content" : [
30
+ {
31
+ "type" : "text",
32
+ "text" : query
33
+ },
34
+ {
35
+ "type": "image_url",
36
+ "image_url": {
37
+ "url": f"data:image/jpeg;base64,{encode_imgae}"
38
+ }
39
+ }
40
+
41
+ ]
42
+ }
43
+ ]
44
+ chat_completion = client.chat.completions.create(
45
+ messages = messages,
46
+ model = "meta-llama/llama-4-scout-17b-16e-instruct",
47
+ temperature = 0.7
48
+ )
49
+ return chat_completion.choices[0].message.content
50
+
51
+ if __name__ == "__main__":
52
+ query = "What happen with my face can you analyze that?"
53
+ e_image=encodeimage()
54
+ AnalyzeImagewithQuery(encode_imgae=e_image,query=query)
README.md CHANGED
@@ -1,14 +1,2 @@
1
- ---
2
- title: ChatWithDoctorAny
3
- emoji: 🐢
4
- colorFrom: blue
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.25.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: ChatWithDoctorAny is a smart AI-powered assistant designed t
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # ChatWithDoctorAny
2
+ ChatWithDoctorAny is a smart AI-powered assistant designed to simulate conversations with a virtual doctor. Whether you have a general health concern or just want instant advice, this tool gives you a safe space to ask health-related questions, anytime and anywhere.
 
 
 
 
 
 
 
 
 
 
 
 
VoiceOfDoctor.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gtts import gTTS
2
+ from pydub import AudioSegment
3
+ import os
4
+ from dotenv import load_dotenv # Optional, only if you need environment variables
5
+ import pygame
6
+ import warnings
7
+ warnings.filterwarnings("ignore")
8
+
9
+ # Optional, only if you have any environment variables to load
10
+ load_dotenv()
11
+
12
+ def text_to_speech_with_gtts(text, mp3_output_path):
13
+ """
14
+ Converts text to speech using gTTS, saves it as MP3, and optionally converts it to WAV.
15
+
16
+ Args:
17
+ - text (str): The text that will be converted to speech.
18
+ - mp3_output_path (str): The path where the MP3 file will be saved.
19
+ - wav_output_path (str, optional): If provided, will save the converted WAV file here.
20
+ """
21
+ try:
22
+ # Convert text to speech
23
+ print("Converting text to speech...")
24
+ tts = gTTS(text=text, lang='en')
25
+ tts.save(mp3_output_path)
26
+ print(f"MP3 file saved to {mp3_output_path}")
27
+
28
+ # Initialize pygame mixer
29
+ pygame.mixer.init()
30
+
31
+ # Play the MP3 file
32
+ print("Playing the MP3 file...")
33
+ pygame.mixer.music.load(mp3_output_path)
34
+ pygame.mixer.music.play()
35
+
36
+ while pygame.mixer.music.get_busy():
37
+ pygame.time.Clock().tick(14)
38
+
39
+ print("Audio playback finished.")
40
+
41
+ except Exception as e:
42
+ print(f"Error during text-to-speech conversion: {e}")
43
+
44
+
45
+ # Example usage
46
+ # text = "Hello my name is waris. i am from islamabad, Right now i am struggling to get the job in the field of Art"
47
+ # mp3_file = r"C:\Users\HP\Desktop\ChatWithDoctorAny\ChatWithDoctorAny\output.mp3"
48
+
49
+ # text_to_speech_with_gtts(text, mp3_file)
VoiceOfPatient.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # VoiceOfPatient.py
2
+ import logging
3
+ import speech_recognition as sr
4
+ from pydub import AudioSegment
5
+ from io import BytesIO
6
+ import os
7
+ from groq import Groq
8
+ from dotenv import load_dotenv
9
+ import warnings
10
+ from pydub import AudioSegment
11
+ from pydub.utils import which
12
+
13
+ warnings.filterwarnings("ignore")
14
+ load_dotenv()
15
+
16
+ # Get the ffmpeg path from environment and register it with pydub
17
+ ffmpeg_path = os.getenv("FFMPEG_PATH")
18
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
19
+ if ffmpeg_path:
20
+ AudioSegment.converter = ffmpeg_path
21
+ else:
22
+ raise EnvironmentError("FFMPEG_PATH is not set. Please define it in the .env file.")
23
+
24
+ # Configure logging
25
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
+
27
+ def record_audio(file_path, timeout=20, phrase_time_limit=None):
28
+ """
29
+ Record audio from the microphone and save it as an MP3 file.
30
+
31
+ Args:
32
+ file_path (str): Path to save the recorded audio file.
33
+ timeout (int): Max time to wait for speech to start (in seconds).
34
+ phrase_time_limit (int or None): Max length of the speech (in seconds).
35
+ """
36
+ recognizer = sr.Recognizer()
37
+
38
+ try:
39
+ with sr.Microphone() as source:
40
+ logging.info("Adjusting for ambient noise...")
41
+ recognizer.adjust_for_ambient_noise(source, duration=1)
42
+ logging.info("Start speaking now...")
43
+
44
+ audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
45
+ logging.info("Recording complete.")
46
+
47
+ wav_data = audio_data.get_wav_data()
48
+ audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
49
+ audio_segment.export(file_path, format="mp3", bitrate="128k")
50
+
51
+ logging.info(f"Audio saved to: {file_path}")
52
+
53
+ except Exception as e:
54
+ logging.error(f"An error occurred: {e}")
55
+
56
+ audio_file_path = "patientvoicetest.mp3"
57
+ record_audio(file_path=audio_file_path)
58
+
59
+ # Now setup speech to text model setup for transcribe the text from the voice
60
+ client = Groq(api_key=GROQ_API_KEY)
61
+ def transcribe_with_whisper(audio_file_path,model_name="meta-llama/llama-4-scout-17b-16e-instruct"):
62
+
63
+
64
+ with open(audio_file_path, "rb") as audio_file:
65
+ transcription = client.audio.transcriptions.create(
66
+ model=model_name,
67
+ file=audio_file,
68
+ language="en",
69
+ )
70
+
71
+ return transcription.text
72
+
73
+
74
+ if __name__ == "__main__":
75
+ pass
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from VoiceOfPatient import record_audio,transcribe_with_whisper
2
+ from VoiceOfDoctor import text_to_speech_with_gtts
3
+ from ImageForDoctor import encodeimage,AnalyzeImagewithQuery
4
+ import gradio as gr
5
+ from dotenv import load_dotenv
6
+ import os
7
+
8
+ load_dotenv()
9
+
10
+ prompt = """
11
+ You are Doctor Any, a virtual medical expert designed to assist with health-related issues. Always respond as a confident, knowledgeable doctor.
12
+
13
+ When given a patient’s image and question, analyze both thoroughly and provide a clear medical explanation of what might be wrong. Based on your analysis, offer direct, practical advice or treatment suggestions. Act like a real doctor — do not suggest consulting another specialist unless the condition is life-threatening or beyond your scope.
14
+
15
+ Do not answer questions unrelated to medicine or mental health. Instead, reply: "I am Doctor Any, a virtual medical expert. I only assist with health-related matters."
16
+
17
+ Keep responses brief—no more than 2 to 3 lines. Be precise, informative, and avoid using extra spaces, special characters, or numbers. Speak clearly and with authority, like a professional doctor guiding a patient.
18
+ """
19
+
20
+ #now setup the frontend than integrate the function with i.
21
+
22
+ def process_input(audio_filepath,image_filepath):
23
+ speech_to_text = transcribe_with_whisper(audio_file_path=audio_filepath,
24
+ model_name="whisper-large-v3")
25
+ if image_filepath:
26
+ doctor_response = AnalyzeImagewithQuery(encode_imgae=encodeimage(image_filepath),query=prompt+speech_to_text)
27
+ else:
28
+ doctor_response = "No image for me to analayze.Kindly upload the photo."
29
+
30
+ voice_of_doctor = text_to_speech_with_gtts(text=doctor_response,mp3_output_path="final.mp3")
31
+
32
+ return speech_to_text, doctor_response, voice_of_doctor
33
+
34
+ iface = gr.Interface(
35
+ fn=process_input,
36
+ inputs=[
37
+ gr.Audio(sources=["microphone"], type="filepath"),
38
+ gr.Image(type="filepath")
39
+ ],
40
+ outputs=[
41
+ gr.Textbox(label="Speech to Text"),
42
+ gr.Textbox(label="DocorAny-Response"),
43
+ gr.Audio("Temp.mp3")
44
+ ],
45
+ title="Chat With DoctorAny."
46
+ )
47
+
48
+ iface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ python
2
+ gradio
3
+ load_dotenv
4
+ os
5
+ langchain
6
+ langchain_groq
7
+ gTTs