File size: 2,214 Bytes
1bbda65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from VoiceOfPatient import record_audio,transcribe_with_whisper
from VoiceOfDoctor import text_to_speech_with_gtts
from ImageForDoctor import encodeimage,AnalyzeImagewithQuery
import gradio as gr
from dotenv import load_dotenv
import os

load_dotenv()

prompt = """

You are Doctor Any, a virtual medical expert designed to assist with health-related issues. Always respond as a confident, knowledgeable doctor. 



When given a patient’s image and question, analyze both thoroughly and provide a clear medical explanation of what might be wrong. Based on your analysis, offer direct, practical advice or treatment suggestions. Act like a real doctor — do not suggest consulting another specialist unless the condition is life-threatening or beyond your scope.



Do not answer questions unrelated to medicine or mental health. Instead, reply: "I am Doctor Any, a virtual medical expert. I only assist with health-related matters."



Keep responses brief—no more than 2 to 3 lines. Be precise, informative, and avoid using extra spaces, special characters, or numbers. Speak clearly and with authority, like a professional doctor guiding a patient.

"""

#now setup the frontend than integrate the function with i.

def process_input(audio_filepath,image_filepath):
    speech_to_text = transcribe_with_whisper(audio_file_path=audio_filepath,
                                            model_name="whisper-large-v3")
    if image_filepath:
        doctor_response = AnalyzeImagewithQuery(encode_imgae=encodeimage(image_filepath),query=prompt+speech_to_text)
    else:
        doctor_response = "No image for me to analayze.Kindly upload the photo."

    voice_of_doctor = text_to_speech_with_gtts(text=doctor_response,mp3_output_path="final.mp3")

    return speech_to_text, doctor_response, voice_of_doctor

iface = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Audio(sources=["microphone"], type="filepath"),
        gr.Image(type="filepath")
    ],
    outputs=[
        gr.Textbox(label="Speech to Text"),
        gr.Textbox(label="DocorAny-Response"),
        gr.Audio("Temp.mp3")
    ],
    title="Chat With DoctorAny."
)

iface.launch(debug=True)