Spaces:
Running
Running
add bert & xgboost from global
Browse files- app.py +21 -3
- xgboost_wrapper.py +15 -2
app.py
CHANGED
@@ -9,7 +9,25 @@ from urllib.parse import urlparse
|
|
9 |
from bs4 import BeautifulSoup
|
10 |
import time
|
11 |
import joblib
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# --- import your architecture ---
|
15 |
# Make sure this file is in the repo (e.g., models/deberta_lstm_classifier.py)
|
@@ -423,7 +441,7 @@ def predict_xgboost_interface_fn(text: str):
|
|
423 |
return {"error": "Please enter a URL or text."}, ""
|
424 |
|
425 |
if not xgboost_detector.available:
|
426 |
-
return {"
|
427 |
|
428 |
# Check if input is URL
|
429 |
if is_url(text.strip()):
|
@@ -447,7 +465,7 @@ def predict_xgboost_interface_fn(text: str):
|
|
447 |
fetch_status = ""
|
448 |
|
449 |
if not result:
|
450 |
-
return {"
|
451 |
|
452 |
predicted_class = "phishing" if result['is_phishing'] else "benign"
|
453 |
confidence = max(result['probability'])
|
|
|
9 |
from bs4 import BeautifulSoup
|
10 |
import time
|
11 |
import joblib
|
12 |
+
|
13 |
+
# Try to import XGBoost wrapper, handle gracefully if not available
|
14 |
+
try:
|
15 |
+
from xgboost_wrapper import xgboost_detector
|
16 |
+
XGBOOST_AVAILABLE = True
|
17 |
+
except Exception as e:
|
18 |
+
print(f"XGBoost wrapper not available: {e}")
|
19 |
+
XGBOOST_AVAILABLE = False
|
20 |
+
# Create a dummy detector
|
21 |
+
class DummyDetector:
|
22 |
+
def __init__(self):
|
23 |
+
self.available = False
|
24 |
+
def predict_combined(self, *args, **kwargs):
|
25 |
+
return None
|
26 |
+
def predict_url(self, *args, **kwargs):
|
27 |
+
return None
|
28 |
+
def predict_html(self, *args, **kwargs):
|
29 |
+
return None
|
30 |
+
xgboost_detector = DummyDetector()
|
31 |
|
32 |
# --- import your architecture ---
|
33 |
# Make sure this file is in the repo (e.g., models/deberta_lstm_classifier.py)
|
|
|
441 |
return {"error": "Please enter a URL or text."}, ""
|
442 |
|
443 |
if not xgboost_detector.available:
|
444 |
+
return {"benign": 0.5, "phishing": 0.5}, "XGBoost models are not properly loaded."
|
445 |
|
446 |
# Check if input is URL
|
447 |
if is_url(text.strip()):
|
|
|
465 |
fetch_status = ""
|
466 |
|
467 |
if not result:
|
468 |
+
return {"benign": 0.5, "phishing": 0.5}, "Failed to get prediction from XGBoost models."
|
469 |
|
470 |
predicted_class = "phishing" if result['is_phishing'] else "benign"
|
471 |
confidence = max(result['probability'])
|
xgboost_wrapper.py
CHANGED
@@ -28,9 +28,22 @@ def load_model_from_hub(repo_id, filename):
|
|
28 |
try:
|
29 |
# Download model from Hugging Face Hub
|
30 |
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
except Exception as e:
|
33 |
-
print(f"Failed to
|
34 |
return None
|
35 |
|
36 |
def load_model_safe(model_path):
|
|
|
28 |
try:
|
29 |
# Download model from Hugging Face Hub
|
30 |
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
31 |
+
|
32 |
+
# Try different loading methods to handle version compatibility
|
33 |
+
try:
|
34 |
+
return joblib.load(model_path)
|
35 |
+
except Exception as e1:
|
36 |
+
try:
|
37 |
+
# Try with pickle
|
38 |
+
with open(model_path, 'rb') as f:
|
39 |
+
return pickle.load(f)
|
40 |
+
except Exception as e2:
|
41 |
+
print(f"Failed to load model {filename} from {repo_id}: {e1}")
|
42 |
+
print(f"Pickle fallback failed: {e2}")
|
43 |
+
return None
|
44 |
+
|
45 |
except Exception as e:
|
46 |
+
print(f"Failed to download model {filename} from {repo_id}: {e}")
|
47 |
return None
|
48 |
|
49 |
def load_model_safe(model_path):
|