Spaces:
Running
Running
Commit
·
65d7792
1
Parent(s):
4455263
Add debug loggings
Browse files- Dockerfile +1 -3
- app.py +71 -13
- requirements.txt +1 -0
Dockerfile
CHANGED
@@ -32,6 +32,4 @@ RUN chown -R user:user /app/model_cache
|
|
32 |
EXPOSE 7860
|
33 |
|
34 |
# Run the application
|
35 |
-
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
36 |
-
|
37 |
-
|
|
|
32 |
EXPOSE 7860
|
33 |
|
34 |
# Run the application
|
35 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "debug"]
|
|
|
|
app.py
CHANGED
@@ -9,35 +9,85 @@ from pymongo import MongoClient
|
|
9 |
from google import genai
|
10 |
from sentence_transformers import SentenceTransformer
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# ✅ Environment Variables
|
13 |
mongo_uri = os.getenv("MONGO_URI")
|
14 |
index_uri = os.getenv("INDEX_URI")
|
15 |
gemini_flash_api_key = os.getenv("FlashAPI")
|
16 |
-
|
17 |
if not all([gemini_flash_api_key, mongo_uri, index_uri]):
|
18 |
raise ValueError("❌ Missing API keys! Set them in Hugging Face Secrets.")
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
os.environ["OMP_NUM_THREADS"] = "1"
|
22 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
23 |
|
24 |
# ✅ Initialize FastAPI app
|
25 |
app = FastAPI(title="Medical Chatbot API")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# ✅ Use Lazy Loading for FAISS Index
|
28 |
index = None # Delay FAISS Index loading until first query
|
29 |
|
30 |
-
# ✅ Load SentenceTransformer Model (Quantized)
|
|
|
31 |
print("📥 Loading SentenceTransformer Model...")
|
32 |
MODEL_CACHE_DIR = "/app/model_cache"
|
33 |
-
|
34 |
-
embedding_model =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
# ✅ Setup MongoDB Connection
|
|
|
37 |
client = MongoClient(mongo_uri)
|
38 |
db = client["MedicalChatbotDB"]
|
39 |
qa_collection = db["qa_data"]
|
40 |
-
|
41 |
iclient = MongoClient(index_uri)
|
42 |
idb = iclient["MedicalChatbotDB"]
|
43 |
index_collection = idb["faiss_index_files"]
|
@@ -56,18 +106,20 @@ def load_faiss_index():
|
|
56 |
index_bytes_np = np.frombuffer(stored_index_bytes, dtype='uint8')
|
57 |
index = faiss.deserialize_index(index_bytes_np)
|
58 |
print("✅ FAISS Index Loaded")
|
|
|
59 |
else:
|
60 |
print("❌ FAISS index not found in GridFS.")
|
|
|
61 |
return index
|
62 |
|
63 |
# ✅ Retrieve Medical Info
|
64 |
def retrieve_medical_info(query):
|
65 |
global index
|
66 |
index = load_faiss_index() # Load FAISS on demand
|
67 |
-
|
68 |
if index is None:
|
69 |
return ["No medical information available."]
|
70 |
-
|
71 |
query_embedding = embedding_model.encode([query], convert_to_numpy=True)
|
72 |
_, idxs = index.search(query_embedding, k=3)
|
73 |
results = [qa_collection.find_one({"i": int(i)}).get("Doctor", "No answer available.") for i in idxs[0]]
|
@@ -80,6 +132,7 @@ def gemini_flash_completion(prompt, model, temperature=0.7):
|
|
80 |
response = client_genai.models.generate_content(model=model, contents=prompt)
|
81 |
return response.text
|
82 |
except Exception as e:
|
|
|
83 |
print(f"❌ Error calling Gemini API: {e}")
|
84 |
return "Error generating response from Gemini."
|
85 |
|
@@ -118,15 +171,20 @@ async def chat_endpoint(data: dict):
|
|
118 |
lang = data.get("lang", "EN")
|
119 |
if not user_query:
|
120 |
return JSONResponse(content={"response": "No query provided."})
|
121 |
-
|
122 |
start_time = time.time()
|
123 |
response_text = chatbot.chat(user_query, lang)
|
124 |
end_time = time.time()
|
125 |
response_text += f"\n\n(Response time: {end_time - start_time:.2f} seconds)"
|
126 |
-
|
127 |
return JSONResponse(content={"response": response_text})
|
128 |
|
129 |
-
# ✅ Run Uvicorn
|
130 |
if __name__ == "__main__":
|
|
|
131 |
print("✅ Starting FastAPI Server...")
|
132 |
-
|
|
|
|
|
|
|
|
|
|
9 |
from google import genai
|
10 |
from sentence_transformers import SentenceTransformer
|
11 |
|
12 |
+
# ✅ Enable Logging for Debugging
|
13 |
+
logging.basicConfig(level=logging.DEBUG)
|
14 |
+
logger = logging.getLogger("medical-chatbot")
|
15 |
+
# Debug Start
|
16 |
+
logger.info("🚀 Starting Medical Chatbot API...")
|
17 |
+
print("🚀 Starting Medical Chatbot API...")
|
18 |
+
|
19 |
# ✅ Environment Variables
|
20 |
mongo_uri = os.getenv("MONGO_URI")
|
21 |
index_uri = os.getenv("INDEX_URI")
|
22 |
gemini_flash_api_key = os.getenv("FlashAPI")
|
23 |
+
# Validate environment endpoint
|
24 |
if not all([gemini_flash_api_key, mongo_uri, index_uri]):
|
25 |
raise ValueError("❌ Missing API keys! Set them in Hugging Face Secrets.")
|
26 |
+
logger.info(f"🔎 MongoDB URI: {mongo_uri}")
|
27 |
+
logger.info(f"🔎 FAISS Index URI: {index_uri}")
|
28 |
+
|
29 |
+
# ✅ Monitor Resources Before Startup
|
30 |
+
import psutil
|
31 |
+
def check_system_resources():
|
32 |
+
memory = psutil.virtual_memory()
|
33 |
+
cpu = psutil.cpu_percent(interval=1)
|
34 |
+
disk = psutil.disk_usage("/")
|
35 |
+
# Defines log info messages
|
36 |
+
logger.info(f"🔍 System Resources - RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%")
|
37 |
+
if memory.percent > 85:
|
38 |
+
logger.warning("⚠️ High RAM usage detected!")
|
39 |
+
if cpu > 90:
|
40 |
+
logger.warning("⚠️ High CPU usage detected!")
|
41 |
+
if disk.percent > 90:
|
42 |
+
logger.warning("⚠️ High Disk usage detected!")
|
43 |
+
check_system_resources()
|
44 |
+
|
45 |
+
# ✅ Reduce Memory usage with optimizers
|
46 |
os.environ["OMP_NUM_THREADS"] = "1"
|
47 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
48 |
|
49 |
# ✅ Initialize FastAPI app
|
50 |
app = FastAPI(title="Medical Chatbot API")
|
51 |
+
from fastapi.middleware.cors import CORSMiddleware # Bypassing CORS origin
|
52 |
+
# Define the origins
|
53 |
+
origins = [
|
54 |
+
"http://localhost:5173", # Vite dev server
|
55 |
+
"http://localhost:3000", # Another vercel local dev
|
56 |
+
"https://medical-chatbot-henna.vercel.app", # ✅ Vercel frontend production URL
|
57 |
+
|
58 |
+
]
|
59 |
+
# Add the CORS middleware:
|
60 |
+
app.add_middleware(
|
61 |
+
CORSMiddleware,
|
62 |
+
allow_origins=origins, # or ["*"] to allow all
|
63 |
+
allow_credentials=True,
|
64 |
+
allow_methods=["*"],
|
65 |
+
allow_headers=["*"],
|
66 |
+
)
|
67 |
|
68 |
# ✅ Use Lazy Loading for FAISS Index
|
69 |
index = None # Delay FAISS Index loading until first query
|
70 |
|
71 |
+
# ✅ Load SentenceTransformer Model (Quantized/Halved)
|
72 |
+
logger.info("📥 Loading SentenceTransformer Model...")
|
73 |
print("📥 Loading SentenceTransformer Model...")
|
74 |
MODEL_CACHE_DIR = "/app/model_cache"
|
75 |
+
try:
|
76 |
+
embedding_model = SentenceTransformer(MODEL_CACHE_DIR, device="cpu")
|
77 |
+
embedding_model = embedding_model.half() # Reduce memory
|
78 |
+
logger.info("✅ Model Loaded Successfully.")
|
79 |
+
print("✅ Model Loaded Successfully.")
|
80 |
+
except Exception as e:
|
81 |
+
logger.error(f"❌ Model Loading Failed: {e}")
|
82 |
+
exit(1)
|
83 |
+
|
84 |
|
85 |
# ✅ Setup MongoDB Connection
|
86 |
+
# QA data
|
87 |
client = MongoClient(mongo_uri)
|
88 |
db = client["MedicalChatbotDB"]
|
89 |
qa_collection = db["qa_data"]
|
90 |
+
# FAISS Index data
|
91 |
iclient = MongoClient(index_uri)
|
92 |
idb = iclient["MedicalChatbotDB"]
|
93 |
index_collection = idb["faiss_index_files"]
|
|
|
106 |
index_bytes_np = np.frombuffer(stored_index_bytes, dtype='uint8')
|
107 |
index = faiss.deserialize_index(index_bytes_np)
|
108 |
print("✅ FAISS Index Loaded")
|
109 |
+
logger.info("✅ FAISS Index Loaded")
|
110 |
else:
|
111 |
print("❌ FAISS index not found in GridFS.")
|
112 |
+
logger.error("❌ FAISS index not found in GridFS.")
|
113 |
return index
|
114 |
|
115 |
# ✅ Retrieve Medical Info
|
116 |
def retrieve_medical_info(query):
|
117 |
global index
|
118 |
index = load_faiss_index() # Load FAISS on demand
|
119 |
+
# N/A question
|
120 |
if index is None:
|
121 |
return ["No medical information available."]
|
122 |
+
# Embed the query and send to QA db to lookup
|
123 |
query_embedding = embedding_model.encode([query], convert_to_numpy=True)
|
124 |
_, idxs = index.search(query_embedding, k=3)
|
125 |
results = [qa_collection.find_one({"i": int(i)}).get("Doctor", "No answer available.") for i in idxs[0]]
|
|
|
132 |
response = client_genai.models.generate_content(model=model, contents=prompt)
|
133 |
return response.text
|
134 |
except Exception as e:
|
135 |
+
logger.error(f"❌ Error calling Gemini API: {e}")
|
136 |
print(f"❌ Error calling Gemini API: {e}")
|
137 |
return "Error generating response from Gemini."
|
138 |
|
|
|
171 |
lang = data.get("lang", "EN")
|
172 |
if not user_query:
|
173 |
return JSONResponse(content={"response": "No query provided."})
|
174 |
+
# Output parameter
|
175 |
start_time = time.time()
|
176 |
response_text = chatbot.chat(user_query, lang)
|
177 |
end_time = time.time()
|
178 |
response_text += f"\n\n(Response time: {end_time - start_time:.2f} seconds)"
|
179 |
+
# Send JSON response
|
180 |
return JSONResponse(content={"response": response_text})
|
181 |
|
182 |
+
# ✅ Run Uvicorn
|
183 |
if __name__ == "__main__":
|
184 |
+
logger.info("✅ Starting FastAPI Server...")
|
185 |
print("✅ Starting FastAPI Server...")
|
186 |
+
try:
|
187 |
+
uvicorn.run(app, host="0.0.0.0", port=7860, log_level="debug")
|
188 |
+
except Exception as e:
|
189 |
+
logger.error(f"❌ Server Startup Failed: {e}")
|
190 |
+
exit(1)
|
requirements.txt
CHANGED
@@ -16,6 +16,7 @@ pymongo
|
|
16 |
uvicorn
|
17 |
fastapi
|
18 |
torch # Reduce model load with half-precision (float16) to reduce RAM usage
|
|
|
19 |
# gradio # On Huggingface deployment with gradio or serving FastAPI only
|
20 |
# streamlit # On streamlit deployment with daemon
|
21 |
# requests
|
|
|
16 |
uvicorn
|
17 |
fastapi
|
18 |
torch # Reduce model load with half-precision (float16) to reduce RAM usage
|
19 |
+
psutil # CPU/RAM logger
|
20 |
# gradio # On Huggingface deployment with gradio or serving FastAPI only
|
21 |
# streamlit # On streamlit deployment with daemon
|
22 |
# requests
|