Phishing-Detector

Running

th1enq commited on 7 days ago

Commit

d82d422

1 Parent(s): 6eb199d

add bert & xgboost from global

Files changed (2) hide show

app.py CHANGED Viewed

@@ -9,7 +9,25 @@ from urllib.parse import urlparse
 from bs4 import BeautifulSoup
 import time
 import joblib
-from xgboost_wrapper import xgboost_detector
 # --- import your architecture ---
 # Make sure this file is in the repo (e.g., models/deberta_lstm_classifier.py)
@@ -423,7 +441,7 @@ def predict_xgboost_interface_fn(text: str):
         return {"error": "Please enter a URL or text."}, ""
     if not xgboost_detector.available:
-        return {"error": "XGBoost models not available"}, "XGBoost models are not properly loaded."
     # Check if input is URL
     if is_url(text.strip()):
@@ -447,7 +465,7 @@ def predict_xgboost_interface_fn(text: str):
         fetch_status = ""
     if not result:
-        return {"error": "XGBoost prediction failed"}, "Failed to get prediction from XGBoost models."
     predicted_class = "phishing" if result['is_phishing'] else "benign"
     confidence = max(result['probability'])

 from bs4 import BeautifulSoup
 import time
 import joblib
+# Try to import XGBoost wrapper, handle gracefully if not available
+try:
+    from xgboost_wrapper import xgboost_detector
+    XGBOOST_AVAILABLE = True
+except Exception as e:
+    print(f"XGBoost wrapper not available: {e}")
+    XGBOOST_AVAILABLE = False
+    # Create a dummy detector
+    class DummyDetector:
+        def __init__(self):
+            self.available = False
+        def predict_combined(self, *args, **kwargs):
+            return None
+        def predict_url(self, *args, **kwargs):
+            return None
+        def predict_html(self, *args, **kwargs):
+            return None
+    xgboost_detector = DummyDetector()
 # --- import your architecture ---
 # Make sure this file is in the repo (e.g., models/deberta_lstm_classifier.py)
         return {"error": "Please enter a URL or text."}, ""
     if not xgboost_detector.available:
+        return {"benign": 0.5, "phishing": 0.5}, "XGBoost models are not properly loaded."
     # Check if input is URL
     if is_url(text.strip()):
         fetch_status = ""
     if not result:
+        return {"benign": 0.5, "phishing": 0.5}, "Failed to get prediction from XGBoost models."
     predicted_class = "phishing" if result['is_phishing'] else "benign"
     confidence = max(result['probability'])

xgboost_wrapper.py CHANGED Viewed

@@ -28,9 +28,22 @@ def load_model_from_hub(repo_id, filename):
     try:
         # Download model from Hugging Face Hub
         model_path = hf_hub_download(repo_id=repo_id, filename=filename)
-        return joblib.load(model_path)
     except Exception as e:
-        print(f"Failed to load model {filename} from {repo_id}: {e}")
         return None
 def load_model_safe(model_path):

     try:
         # Download model from Hugging Face Hub
         model_path = hf_hub_download(repo_id=repo_id, filename=filename)
+        # Try different loading methods to handle version compatibility
+        try:
+            return joblib.load(model_path)
+        except Exception as e1:
+            try:
+                # Try with pickle
+                with open(model_path, 'rb') as f:
+                    return pickle.load(f)
+            except Exception as e2:
+                print(f"Failed to load model {filename} from {repo_id}: {e1}")
+                print(f"Pickle fallback failed: {e2}")
+                return None
     except Exception as e:
+        print(f"Failed to download model {filename} from {repo_id}: {e}")
         return None
 def load_model_safe(model_path):