Spaces:

Waris01
/

ChatWithDoctorAny

Sleeping

App Files Files Community

Waris01 commited on Apr 15

Commit

1bbda65

verified ·

1 Parent(s): 43682e1

Upload 6 files

Browse files

Files changed (6) hide show

ImageForDoctor.py +54 -0
README.md +2 -14
VoiceOfDoctor.py +49 -0
VoiceOfPatient.py +75 -0
app.py +48 -0
requirements.txt +7 -0

ImageForDoctor.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from dotenv import load_dotenv
+from groq import Groq
+import base64 #bit to string
+#setup1 groq api setup
+load_dotenv()
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+client = Groq(api_key=GROQ_API_KEY)
+#setup2 the image into encoded formate
+# image_path = "acne.jpg"
+model = "llama-3.2-90b-vision-preview"
+def encodeimage(image_path):
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Image file not found: {image_path}")
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+    #step3 Setup the grof for vision
+def AnalyzeImagewithQuery(query,encode_imgae):
+    messages = [
+        {
+            "role" : "user",
+            "content" : [
+                {
+                    "type" : "text",
+                    "text" : query
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{encode_imgae}"
+                    }
+                }
+            ]
+        }
+    ]
+    chat_completion = client.chat.completions.create(
+        messages = messages,
+        model = "meta-llama/llama-4-scout-17b-16e-instruct",
+        temperature = 0.7
+    )
+    return chat_completion.choices[0].message.content
+if __name__ == "__main__":
+    query = "What happen with my face can you analyze that?"
+    e_image=encodeimage()
+    AnalyzeImagewithQuery(encode_imgae=e_image,query=query)

README.md CHANGED Viewed

@@ -1,14 +1,2 @@
----
-title: ChatWithDoctorAny
-emoji: 🐢
-colorFrom: blue
-colorTo: purple
-sdk: gradio
-sdk_version: 5.25.1
-app_file: app.py
-pinned: false
-license: mit
-short_description: ChatWithDoctorAny is a smart AI-powered assistant designed t
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # ChatWithDoctorAny
2	+ ChatWithDoctorAny is a smart AI-powered assistant designed to simulate conversations with a virtual doctor. Whether you have a general health concern or just want instant advice, this tool gives you a safe space to ask health-related questions, anytime and anywhere.

VoiceOfDoctor.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from gtts import gTTS
+from pydub import AudioSegment
+import os
+from dotenv import load_dotenv  # Optional, only if you need environment variables
+import pygame
+import warnings
+warnings.filterwarnings("ignore")
+# Optional, only if you have any environment variables to load
+load_dotenv()
+def text_to_speech_with_gtts(text, mp3_output_path):
+    """
+    Converts text to speech using gTTS, saves it as MP3, and optionally converts it to WAV.
+    Args:
+    - text (str): The text that will be converted to speech.
+    - mp3_output_path (str): The path where the MP3 file will be saved.
+    - wav_output_path (str, optional): If provided, will save the converted WAV file here.
+    """
+    try:
+        # Convert text to speech
+        print("Converting text to speech...")
+        tts = gTTS(text=text, lang='en')
+        tts.save(mp3_output_path)
+        print(f"MP3 file saved to {mp3_output_path}")
+        # Initialize pygame mixer
+        pygame.mixer.init()
+        # Play the MP3 file
+        print("Playing the MP3 file...")
+        pygame.mixer.music.load(mp3_output_path)
+        pygame.mixer.music.play()
+        while pygame.mixer.music.get_busy():
+            pygame.time.Clock().tick(14)
+        print("Audio playback finished.")
+    except Exception as e:
+        print(f"Error during text-to-speech conversion: {e}")
+# Example usage
+# text = "Hello my name is waris. i am from islamabad, Right now i am struggling to get the job in the field of Art"
+# mp3_file = r"C:\Users\HP\Desktop\ChatWithDoctorAny\ChatWithDoctorAny\output.mp3"
+# text_to_speech_with_gtts(text, mp3_file)

VoiceOfPatient.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# VoiceOfPatient.py
+import logging
+import speech_recognition as sr
+from pydub import AudioSegment
+from io import BytesIO
+import os
+from groq import Groq
+from dotenv import load_dotenv
+import warnings
+from pydub import AudioSegment
+from pydub.utils import which
+warnings.filterwarnings("ignore")
+load_dotenv()
+# Get the ffmpeg path from environment and register it with pydub
+ffmpeg_path = os.getenv("FFMPEG_PATH")
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+if ffmpeg_path:
+    AudioSegment.converter = ffmpeg_path
+else:
+    raise EnvironmentError("FFMPEG_PATH is not set. Please define it in the .env file.")
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def record_audio(file_path, timeout=20, phrase_time_limit=None):
+    """
+    Record audio from the microphone and save it as an MP3 file.
+    Args:
+        file_path (str): Path to save the recorded audio file.
+        timeout (int): Max time to wait for speech to start (in seconds).
+        phrase_time_limit (int or None): Max length of the speech (in seconds).
+    """
+    recognizer = sr.Recognizer()
+    try:
+        with sr.Microphone() as source:
+            logging.info("Adjusting for ambient noise...")
+            recognizer.adjust_for_ambient_noise(source, duration=1)
+            logging.info("Start speaking now...")
+            audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
+            logging.info("Recording complete.")
+            wav_data = audio_data.get_wav_data()
+            audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
+            audio_segment.export(file_path, format="mp3", bitrate="128k")
+            logging.info(f"Audio saved to: {file_path}")
+    except Exception as e:
+        logging.error(f"An error occurred: {e}")
+audio_file_path = "patientvoicetest.mp3"
+record_audio(file_path=audio_file_path)
+# Now setup speech to text model setup for transcribe the text from the voice
+client = Groq(api_key=GROQ_API_KEY)
+def transcribe_with_whisper(audio_file_path,model_name="meta-llama/llama-4-scout-17b-16e-instruct"):
+    with open(audio_file_path, "rb") as audio_file:
+        transcription = client.audio.transcriptions.create(
+            model=model_name,
+            file=audio_file,
+            language="en",
+        )
+    return transcription.text
+if __name__ == "__main__":
+    pass

app.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from VoiceOfPatient import record_audio,transcribe_with_whisper
+from VoiceOfDoctor import text_to_speech_with_gtts
+from ImageForDoctor import encodeimage,AnalyzeImagewithQuery
+import gradio as gr
+from dotenv import load_dotenv
+import os
+load_dotenv()
+prompt = """
+You are Doctor Any, a virtual medical expert designed to assist with health-related issues. Always respond as a confident, knowledgeable doctor.
+When given a patient’s image and question, analyze both thoroughly and provide a clear medical explanation of what might be wrong. Based on your analysis, offer direct, practical advice or treatment suggestions. Act like a real doctor — do not suggest consulting another specialist unless the condition is life-threatening or beyond your scope.
+Do not answer questions unrelated to medicine or mental health. Instead, reply: "I am Doctor Any, a virtual medical expert. I only assist with health-related matters."
+Keep responses brief—no more than 2 to 3 lines. Be precise, informative, and avoid using extra spaces, special characters, or numbers. Speak clearly and with authority, like a professional doctor guiding a patient.
+"""
+#now setup the frontend than integrate the function with i.
+def process_input(audio_filepath,image_filepath):
+    speech_to_text = transcribe_with_whisper(audio_file_path=audio_filepath,
+                                            model_name="whisper-large-v3")
+    if image_filepath:
+        doctor_response = AnalyzeImagewithQuery(encode_imgae=encodeimage(image_filepath),query=prompt+speech_to_text)
+    else:
+        doctor_response = "No image for me to analayze.Kindly upload the photo."
+    voice_of_doctor = text_to_speech_with_gtts(text=doctor_response,mp3_output_path="final.mp3")
+    return speech_to_text, doctor_response, voice_of_doctor
+iface = gr.Interface(
+    fn=process_input,
+    inputs=[
+        gr.Audio(sources=["microphone"], type="filepath"),
+        gr.Image(type="filepath")
+    ],
+    outputs=[
+        gr.Textbox(label="Speech to Text"),
+        gr.Textbox(label="DocorAny-Response"),
+        gr.Audio("Temp.mp3")
+    ],
+    title="Chat With DoctorAny."
+)
+iface.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+python
+gradio
+load_dotenv
+os
+langchain
+langchain_groq
+gTTs