Spaces:

BinKhoaLe1812
/

Medical-Chatbot

Running

App Files Files Community

LiamKhoaLe commited on Mar 11

Commit

65d7792

1 Parent(s): 4455263

Add debug loggings

Browse files

Files changed (3) hide show

Dockerfile +1 -3
app.py +71 -13
requirements.txt +1 -0

Dockerfile CHANGED Viewed

@@ -32,6 +32,4 @@ RUN chown -R user:user /app/model_cache
 EXPOSE 7860
 # Run the application
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 EXPOSE 7860
 # Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "debug"]

app.py CHANGED Viewed

@@ -9,35 +9,85 @@ from pymongo import MongoClient
 from google import genai
 from sentence_transformers import SentenceTransformer
 # ✅ Environment Variables
 mongo_uri = os.getenv("MONGO_URI")
 index_uri = os.getenv("INDEX_URI")
 gemini_flash_api_key = os.getenv("FlashAPI")
 if not all([gemini_flash_api_key, mongo_uri, index_uri]):
     raise ValueError("❌ Missing API keys! Set them in Hugging Face Secrets.")
-# ✅ Reduce Memory Usage
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # ✅ Initialize FastAPI app
 app = FastAPI(title="Medical Chatbot API")
 # ✅ Use Lazy Loading for FAISS Index
 index = None  # Delay FAISS Index loading until first query
-# ✅ Load SentenceTransformer Model (Quantized)
 print("📥 Loading SentenceTransformer Model...")
 MODEL_CACHE_DIR = "/app/model_cache"
-embedding_model = SentenceTransformer(MODEL_CACHE_DIR, device="cpu")
-embedding_model = embedding_model.half()  # Reduce memory usage
 # ✅ Setup MongoDB Connection
 client = MongoClient(mongo_uri)
 db = client["MedicalChatbotDB"]
 qa_collection = db["qa_data"]
 iclient = MongoClient(index_uri)
 idb = iclient["MedicalChatbotDB"]
 index_collection = idb["faiss_index_files"]
@@ -56,18 +106,20 @@ def load_faiss_index():
             index_bytes_np = np.frombuffer(stored_index_bytes, dtype='uint8')
             index = faiss.deserialize_index(index_bytes_np)
             print("✅ FAISS Index Loaded")
         else:
             print("❌ FAISS index not found in GridFS.")
     return index
 # ✅ Retrieve Medical Info
 def retrieve_medical_info(query):
     global index
     index = load_faiss_index()  # Load FAISS on demand
     if index is None:
         return ["No medical information available."]
     query_embedding = embedding_model.encode([query], convert_to_numpy=True)
     _, idxs = index.search(query_embedding, k=3)
     results = [qa_collection.find_one({"i": int(i)}).get("Doctor", "No answer available.") for i in idxs[0]]
@@ -80,6 +132,7 @@ def gemini_flash_completion(prompt, model, temperature=0.7):
         response = client_genai.models.generate_content(model=model, contents=prompt)
         return response.text
     except Exception as e:
         print(f"❌ Error calling Gemini API: {e}")
         return "Error generating response from Gemini."
@@ -118,15 +171,20 @@ async def chat_endpoint(data: dict):
     lang = data.get("lang", "EN")
     if not user_query:
         return JSONResponse(content={"response": "No query provided."})
     start_time = time.time()
     response_text = chatbot.chat(user_query, lang)
     end_time = time.time()
     response_text += f"\n\n(Response time: {end_time - start_time:.2f} seconds)"
     return JSONResponse(content={"response": response_text})
-# ✅ Run Uvicorn with 1 Worker
 if __name__ == "__main__":
     print("✅ Starting FastAPI Server...")
-    uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)

 from google import genai
 from sentence_transformers import SentenceTransformer
+# ✅ Enable Logging for Debugging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger("medical-chatbot")
+# Debug Start
+logger.info("🚀 Starting Medical Chatbot API...")
+print("🚀 Starting Medical Chatbot API...")
 # ✅ Environment Variables
 mongo_uri = os.getenv("MONGO_URI")
 index_uri = os.getenv("INDEX_URI")
 gemini_flash_api_key = os.getenv("FlashAPI")
+# Validate environment endpoint
 if not all([gemini_flash_api_key, mongo_uri, index_uri]):
     raise ValueError("❌ Missing API keys! Set them in Hugging Face Secrets.")
+logger.info(f"🔎 MongoDB URI: {mongo_uri}")
+logger.info(f"🔎 FAISS Index URI: {index_uri}")
+# ✅ Monitor Resources Before Startup
+import psutil
+def check_system_resources():
+    memory = psutil.virtual_memory()
+    cpu = psutil.cpu_percent(interval=1)
+    disk = psutil.disk_usage("/")
+    # Defines log info messages
+    logger.info(f"🔍 System Resources - RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%")
+    if memory.percent > 85:
+        logger.warning("⚠️ High RAM usage detected!")
+    if cpu > 90:
+        logger.warning("⚠️ High CPU usage detected!")
+    if disk.percent > 90:
+        logger.warning("⚠️ High Disk usage detected!")
+check_system_resources()
+# ✅ Reduce Memory usage with optimizers
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # ✅ Initialize FastAPI app
 app = FastAPI(title="Medical Chatbot API")
+from fastapi.middleware.cors import CORSMiddleware # Bypassing CORS origin
+# Define the origins
+origins = [
+    "http://localhost:5173",                    # Vite dev server
+    "http://localhost:3000",                    # Another vercel local dev
+    "https://medical-chatbot-henna.vercel.app", # ✅ Vercel frontend production URL
+]
+# Add the CORS middleware:
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,   # or ["*"] to allow all
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 # ✅ Use Lazy Loading for FAISS Index
 index = None  # Delay FAISS Index loading until first query
+# ✅ Load SentenceTransformer Model (Quantized/Halved)
+logger.info("📥 Loading SentenceTransformer Model...")
 print("📥 Loading SentenceTransformer Model...")
 MODEL_CACHE_DIR = "/app/model_cache"
+try:
+    embedding_model = SentenceTransformer(MODEL_CACHE_DIR, device="cpu")
+    embedding_model = embedding_model.half()  # Reduce memory
+    logger.info("✅ Model Loaded Successfully.")
+    print("✅ Model Loaded Successfully.")
+except Exception as e:
+    logger.error(f"❌ Model Loading Failed: {e}")
+    exit(1)
 # ✅ Setup MongoDB Connection
+# QA data
 client = MongoClient(mongo_uri)
 db = client["MedicalChatbotDB"]
 qa_collection = db["qa_data"]
+# FAISS Index data
 iclient = MongoClient(index_uri)
 idb = iclient["MedicalChatbotDB"]
 index_collection = idb["faiss_index_files"]
             index_bytes_np = np.frombuffer(stored_index_bytes, dtype='uint8')
             index = faiss.deserialize_index(index_bytes_np)
             print("✅ FAISS Index Loaded")
+            logger.info("✅ FAISS Index Loaded")
         else:
             print("❌ FAISS index not found in GridFS.")
+            logger.error("❌ FAISS index not found in GridFS.")
     return index
 # ✅ Retrieve Medical Info
 def retrieve_medical_info(query):
     global index
     index = load_faiss_index()  # Load FAISS on demand
+    # N/A question
     if index is None:
         return ["No medical information available."]
+    # Embed the query and send to QA db to lookup
     query_embedding = embedding_model.encode([query], convert_to_numpy=True)
     _, idxs = index.search(query_embedding, k=3)
     results = [qa_collection.find_one({"i": int(i)}).get("Doctor", "No answer available.") for i in idxs[0]]
         response = client_genai.models.generate_content(model=model, contents=prompt)
         return response.text
     except Exception as e:
+        logger.error(f"❌ Error calling Gemini API: {e}")
         print(f"❌ Error calling Gemini API: {e}")
         return "Error generating response from Gemini."
     lang = data.get("lang", "EN")
     if not user_query:
         return JSONResponse(content={"response": "No query provided."})
+    # Output parameter
     start_time = time.time()
     response_text = chatbot.chat(user_query, lang)
     end_time = time.time()
     response_text += f"\n\n(Response time: {end_time - start_time:.2f} seconds)"
+    # Send JSON response
     return JSONResponse(content={"response": response_text})
+# ✅ Run Uvicorn
 if __name__ == "__main__":
+    logger.info("✅ Starting FastAPI Server...")
     print("✅ Starting FastAPI Server...")
+    try:
+        uvicorn.run(app, host="0.0.0.0", port=7860, log_level="debug")
+    except Exception as e:
+        logger.error(f"❌ Server Startup Failed: {e}")
+        exit(1)

requirements.txt CHANGED Viewed

@@ -16,6 +16,7 @@ pymongo
 uvicorn
 fastapi
 torch               # Reduce model load with half-precision (float16) to reduce RAM usage
 # gradio            # On Huggingface deployment with gradio or serving FastAPI only
 # streamlit         # On streamlit deployment with daemon
 # requests

 uvicorn
 fastapi
 torch               # Reduce model load with half-precision (float16) to reduce RAM usage
+psutil              # CPU/RAM logger
 # gradio            # On Huggingface deployment with gradio or serving FastAPI only
 # streamlit         # On streamlit deployment with daemon
 # requests