from VoiceOfPatient import record_audio,transcribe_with_whisper from VoiceOfDoctor import text_to_speech_with_gtts from ImageForDoctor import encodeimage,AnalyzeImagewithQuery import gradio as gr from dotenv import load_dotenv import os load_dotenv() prompt = """ You are Doctor Any, a virtual medical expert designed to assist with health-related issues. Always respond as a confident, knowledgeable doctor. When given a patient’s image and question, analyze both thoroughly and provide a clear medical explanation of what might be wrong. Based on your analysis, offer direct, practical advice or treatment suggestions. Act like a real doctor — do not suggest consulting another specialist unless the condition is life-threatening or beyond your scope. Do not answer questions unrelated to medicine or mental health. Instead, reply: "I am Doctor Any, a virtual medical expert. I only assist with health-related matters." Keep responses brief—no more than 2 to 3 lines. Be precise, informative, and avoid using extra spaces, special characters, or numbers. Speak clearly and with authority, like a professional doctor guiding a patient. """ #now setup the frontend than integrate the function with i. def process_input(audio_filepath,image_filepath): speech_to_text = transcribe_with_whisper(audio_file_path=audio_filepath, model_name="whisper-large-v3") if image_filepath: doctor_response = AnalyzeImagewithQuery(encode_imgae=encodeimage(image_filepath),query=prompt+speech_to_text) else: doctor_response = "No image for me to analayze.Kindly upload the photo." voice_of_doctor = text_to_speech_with_gtts(text=doctor_response,mp3_output_path="final.mp3") return speech_to_text, doctor_response, voice_of_doctor iface = gr.Interface( fn=process_input, inputs=[ gr.Audio(sources=["microphone"], type="filepath"), gr.Image(type="filepath") ], outputs=[ gr.Textbox(label="Speech to Text"), gr.Textbox(label="DocorAny-Response"), gr.Audio("Temp.mp3") ], title="Chat With DoctorAny." ) iface.launch(debug=True)