LiamKhoaLe commited on
Commit
65d7792
·
1 Parent(s): 4455263

Add debug loggings

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -3
  2. app.py +71 -13
  3. requirements.txt +1 -0
Dockerfile CHANGED
@@ -32,6 +32,4 @@ RUN chown -R user:user /app/model_cache
32
  EXPOSE 7860
33
 
34
  # Run the application
35
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
36
-
37
-
 
32
  EXPOSE 7860
33
 
34
  # Run the application
35
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "debug"]
 
 
app.py CHANGED
@@ -9,35 +9,85 @@ from pymongo import MongoClient
9
  from google import genai
10
  from sentence_transformers import SentenceTransformer
11
 
 
 
 
 
 
 
 
12
  # ✅ Environment Variables
13
  mongo_uri = os.getenv("MONGO_URI")
14
  index_uri = os.getenv("INDEX_URI")
15
  gemini_flash_api_key = os.getenv("FlashAPI")
16
-
17
  if not all([gemini_flash_api_key, mongo_uri, index_uri]):
18
  raise ValueError("❌ Missing API keys! Set them in Hugging Face Secrets.")
19
-
20
- # Reduce Memory Usage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  os.environ["OMP_NUM_THREADS"] = "1"
22
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
23
 
24
  # ✅ Initialize FastAPI app
25
  app = FastAPI(title="Medical Chatbot API")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # ✅ Use Lazy Loading for FAISS Index
28
  index = None # Delay FAISS Index loading until first query
29
 
30
- # ✅ Load SentenceTransformer Model (Quantized)
 
31
  print("📥 Loading SentenceTransformer Model...")
32
  MODEL_CACHE_DIR = "/app/model_cache"
33
- embedding_model = SentenceTransformer(MODEL_CACHE_DIR, device="cpu")
34
- embedding_model = embedding_model.half() # Reduce memory usage
 
 
 
 
 
 
 
35
 
36
  # ✅ Setup MongoDB Connection
 
37
  client = MongoClient(mongo_uri)
38
  db = client["MedicalChatbotDB"]
39
  qa_collection = db["qa_data"]
40
-
41
  iclient = MongoClient(index_uri)
42
  idb = iclient["MedicalChatbotDB"]
43
  index_collection = idb["faiss_index_files"]
@@ -56,18 +106,20 @@ def load_faiss_index():
56
  index_bytes_np = np.frombuffer(stored_index_bytes, dtype='uint8')
57
  index = faiss.deserialize_index(index_bytes_np)
58
  print("✅ FAISS Index Loaded")
 
59
  else:
60
  print("❌ FAISS index not found in GridFS.")
 
61
  return index
62
 
63
  # ✅ Retrieve Medical Info
64
  def retrieve_medical_info(query):
65
  global index
66
  index = load_faiss_index() # Load FAISS on demand
67
-
68
  if index is None:
69
  return ["No medical information available."]
70
-
71
  query_embedding = embedding_model.encode([query], convert_to_numpy=True)
72
  _, idxs = index.search(query_embedding, k=3)
73
  results = [qa_collection.find_one({"i": int(i)}).get("Doctor", "No answer available.") for i in idxs[0]]
@@ -80,6 +132,7 @@ def gemini_flash_completion(prompt, model, temperature=0.7):
80
  response = client_genai.models.generate_content(model=model, contents=prompt)
81
  return response.text
82
  except Exception as e:
 
83
  print(f"❌ Error calling Gemini API: {e}")
84
  return "Error generating response from Gemini."
85
 
@@ -118,15 +171,20 @@ async def chat_endpoint(data: dict):
118
  lang = data.get("lang", "EN")
119
  if not user_query:
120
  return JSONResponse(content={"response": "No query provided."})
121
-
122
  start_time = time.time()
123
  response_text = chatbot.chat(user_query, lang)
124
  end_time = time.time()
125
  response_text += f"\n\n(Response time: {end_time - start_time:.2f} seconds)"
126
-
127
  return JSONResponse(content={"response": response_text})
128
 
129
- # ✅ Run Uvicorn with 1 Worker
130
  if __name__ == "__main__":
 
131
  print("✅ Starting FastAPI Server...")
132
- uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)
 
 
 
 
 
9
  from google import genai
10
  from sentence_transformers import SentenceTransformer
11
 
12
+ # ✅ Enable Logging for Debugging
13
+ logging.basicConfig(level=logging.DEBUG)
14
+ logger = logging.getLogger("medical-chatbot")
15
+ # Debug Start
16
+ logger.info("🚀 Starting Medical Chatbot API...")
17
+ print("🚀 Starting Medical Chatbot API...")
18
+
19
  # ✅ Environment Variables
20
  mongo_uri = os.getenv("MONGO_URI")
21
  index_uri = os.getenv("INDEX_URI")
22
  gemini_flash_api_key = os.getenv("FlashAPI")
23
+ # Validate environment endpoint
24
  if not all([gemini_flash_api_key, mongo_uri, index_uri]):
25
  raise ValueError("❌ Missing API keys! Set them in Hugging Face Secrets.")
26
+ logger.info(f"🔎 MongoDB URI: {mongo_uri}")
27
+ logger.info(f"🔎 FAISS Index URI: {index_uri}")
28
+
29
+ # ✅ Monitor Resources Before Startup
30
+ import psutil
31
+ def check_system_resources():
32
+ memory = psutil.virtual_memory()
33
+ cpu = psutil.cpu_percent(interval=1)
34
+ disk = psutil.disk_usage("/")
35
+ # Defines log info messages
36
+ logger.info(f"🔍 System Resources - RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%")
37
+ if memory.percent > 85:
38
+ logger.warning("⚠️ High RAM usage detected!")
39
+ if cpu > 90:
40
+ logger.warning("⚠️ High CPU usage detected!")
41
+ if disk.percent > 90:
42
+ logger.warning("⚠️ High Disk usage detected!")
43
+ check_system_resources()
44
+
45
+ # ✅ Reduce Memory usage with optimizers
46
  os.environ["OMP_NUM_THREADS"] = "1"
47
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
48
 
49
  # ✅ Initialize FastAPI app
50
  app = FastAPI(title="Medical Chatbot API")
51
+ from fastapi.middleware.cors import CORSMiddleware # Bypassing CORS origin
52
+ # Define the origins
53
+ origins = [
54
+ "http://localhost:5173", # Vite dev server
55
+ "http://localhost:3000", # Another vercel local dev
56
+ "https://medical-chatbot-henna.vercel.app", # ✅ Vercel frontend production URL
57
+
58
+ ]
59
+ # Add the CORS middleware:
60
+ app.add_middleware(
61
+ CORSMiddleware,
62
+ allow_origins=origins, # or ["*"] to allow all
63
+ allow_credentials=True,
64
+ allow_methods=["*"],
65
+ allow_headers=["*"],
66
+ )
67
 
68
  # ✅ Use Lazy Loading for FAISS Index
69
  index = None # Delay FAISS Index loading until first query
70
 
71
+ # ✅ Load SentenceTransformer Model (Quantized/Halved)
72
+ logger.info("📥 Loading SentenceTransformer Model...")
73
  print("📥 Loading SentenceTransformer Model...")
74
  MODEL_CACHE_DIR = "/app/model_cache"
75
+ try:
76
+ embedding_model = SentenceTransformer(MODEL_CACHE_DIR, device="cpu")
77
+ embedding_model = embedding_model.half() # Reduce memory
78
+ logger.info("✅ Model Loaded Successfully.")
79
+ print("✅ Model Loaded Successfully.")
80
+ except Exception as e:
81
+ logger.error(f"❌ Model Loading Failed: {e}")
82
+ exit(1)
83
+
84
 
85
  # ✅ Setup MongoDB Connection
86
+ # QA data
87
  client = MongoClient(mongo_uri)
88
  db = client["MedicalChatbotDB"]
89
  qa_collection = db["qa_data"]
90
+ # FAISS Index data
91
  iclient = MongoClient(index_uri)
92
  idb = iclient["MedicalChatbotDB"]
93
  index_collection = idb["faiss_index_files"]
 
106
  index_bytes_np = np.frombuffer(stored_index_bytes, dtype='uint8')
107
  index = faiss.deserialize_index(index_bytes_np)
108
  print("✅ FAISS Index Loaded")
109
+ logger.info("✅ FAISS Index Loaded")
110
  else:
111
  print("❌ FAISS index not found in GridFS.")
112
+ logger.error("❌ FAISS index not found in GridFS.")
113
  return index
114
 
115
  # ✅ Retrieve Medical Info
116
  def retrieve_medical_info(query):
117
  global index
118
  index = load_faiss_index() # Load FAISS on demand
119
+ # N/A question
120
  if index is None:
121
  return ["No medical information available."]
122
+ # Embed the query and send to QA db to lookup
123
  query_embedding = embedding_model.encode([query], convert_to_numpy=True)
124
  _, idxs = index.search(query_embedding, k=3)
125
  results = [qa_collection.find_one({"i": int(i)}).get("Doctor", "No answer available.") for i in idxs[0]]
 
132
  response = client_genai.models.generate_content(model=model, contents=prompt)
133
  return response.text
134
  except Exception as e:
135
+ logger.error(f"❌ Error calling Gemini API: {e}")
136
  print(f"❌ Error calling Gemini API: {e}")
137
  return "Error generating response from Gemini."
138
 
 
171
  lang = data.get("lang", "EN")
172
  if not user_query:
173
  return JSONResponse(content={"response": "No query provided."})
174
+ # Output parameter
175
  start_time = time.time()
176
  response_text = chatbot.chat(user_query, lang)
177
  end_time = time.time()
178
  response_text += f"\n\n(Response time: {end_time - start_time:.2f} seconds)"
179
+ # Send JSON response
180
  return JSONResponse(content={"response": response_text})
181
 
182
+ # ✅ Run Uvicorn
183
  if __name__ == "__main__":
184
+ logger.info("✅ Starting FastAPI Server...")
185
  print("✅ Starting FastAPI Server...")
186
+ try:
187
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="debug")
188
+ except Exception as e:
189
+ logger.error(f"❌ Server Startup Failed: {e}")
190
+ exit(1)
requirements.txt CHANGED
@@ -16,6 +16,7 @@ pymongo
16
  uvicorn
17
  fastapi
18
  torch # Reduce model load with half-precision (float16) to reduce RAM usage
 
19
  # gradio # On Huggingface deployment with gradio or serving FastAPI only
20
  # streamlit # On streamlit deployment with daemon
21
  # requests
 
16
  uvicorn
17
  fastapi
18
  torch # Reduce model load with half-precision (float16) to reduce RAM usage
19
+ psutil # CPU/RAM logger
20
  # gradio # On Huggingface deployment with gradio or serving FastAPI only
21
  # streamlit # On streamlit deployment with daemon
22
  # requests