Spaces:
Sleeping
Sleeping
File size: 5,884 Bytes
72ab3a1 664d4ee 05026d9 72ab3a1 d9f1916 f8cd8cc 3a2b389 2fb3bf7 000642c d316383 d9f1916 05026d9 f8cd8cc f7f36ca 664d4ee f8cd8cc d9f1916 f8cd8cc 05026d9 664d4ee f7f36ca 504cee7 05026d9 664d4ee 2fb3bf7 dc6faa5 3a2b389 2fb3bf7 000642c 6129880 000642c d9f1916 664d4ee 6129880 72ab3a1 664d4ee 72ab3a1 664d4ee 72ab3a1 504cee7 6129880 05026d9 6129880 664d4ee 6129880 05026d9 6129880 05026d9 ea52f2e d9f1916 d316383 664d4ee d316383 664d4ee d316383 664d4ee d316383 664d4ee d316383 6129880 10286af 2fb3bf7 664d4ee 10286af 6129880 664d4ee 6129880 664d4ee 6129880 000642c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
from time import sleep
import logging
import sys
import re
import httpx
from fastapi import FastAPI
from fastapi.responses import JSONResponse, FileResponse
from transformers import pipeline
from phishing_datasets import submit_entry
from url_tools import extract_urls, resolve_short_url, extract_domain_from_url
from urlscan_client import UrlscanClient
import requests
from mnemonic_attack import find_confusable_brand
from models.models import MessageModel, QueryModel, AppModel, InputModel, OutputModel, ReportModel
from models.enums import ActionModel, SubActionModel
app = FastAPI()
urlscan = UrlscanClient()
# Remove all handlers associated with the root logger object
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
logging.basicConfig(
level=logging.INFO,
format='%(levelname)s: %(asctime)s %(message)s',
handlers=[logging.StreamHandler(sys.stdout)]
)
pipe = pipeline(task="text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
@app.get("/.well-known/apple-app-site-association", include_in_schema=False)
def get_well_known_aasa():
return JSONResponse(
content={
"messagefilter": {
"apps": [
"X9NN3FSS3T.com.lela.Serenity.SerenityMessageFilterExtension",
"X9NN3FSS3T.com.lela.Serenity"
]
}
},
media_type="application/json"
)
@app.get("/robots.txt", include_in_schema=False)
def get_robots_txt():
return FileResponse("robots.txt")
@app.post("/predict")
def predict(model: InputModel) -> OutputModel:
sender = model.query.sender
text = model.query.message.text
logging.info(f"[{sender}] {text}")
# Debug sleep
pattern = r"^Sent from your Twilio trial account - sleep (\d+)$"
match = re.search(pattern, text)
if match:
number_str = match.group(1)
sleep_duration = int(number_str)
logging.debug(f"[DEBUG SLEEP] Sleeping for {sleep_duration} seconds for sender {sender}")
sleep(sleep_duration)
return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
# Debug category
pattern = r"^Sent from your Twilio trial account - (junk|transaction|promotion)$"
match = re.search(pattern, text)
if match:
category_str = match.group(1)
logging.info(f"[DEBUG CATEGORY] Forced category: {category_str} for sender {sender}")
match category_str:
case 'junk':
return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
case 'transaction':
return OutputModel(action=ActionModel.TRANSACTION, sub_action=SubActionModel.NONE)
case 'promotion':
return OutputModel(action=ActionModel.PROMOTION, sub_action=SubActionModel.NONE)
# Brand usurpation detection using confusables
confusable_brand = find_confusable_brand(text)
if confusable_brand:
logging.warning(f"[BRAND USURPATION] Confusable/homoglyph variant of brand '{confusable_brand}' detected in message. Classified as JUNK.")
return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
result = pipe(text)
label = result[0]['label']
score = result[0]['score']
logging.info(f"[CLASSIFICATION] label={label} score={score}")
if label == 'LABEL_0':
score = 1 - score
# Pattern for detecting an alphanumeric SenderID
alphanumeric_sender_pattern = r'^[A-Za-z][A-Za-z0-9\-\.]{2,14}$'
# Pattern for detecting a short code
shorten_sender_pattern = r'^(?:3\d{4}|[4-8]\d{4})$'
commercial_stop = False
# Detection of commercial senders (short code or alphanumeric)
if re.search(shorten_sender_pattern, sender):
logging.info("[COMMERCIAL] Commercial sender detected (short code)")
score = score * 0.7
elif re.match(alphanumeric_sender_pattern, sender):
logging.info("[COMMERCIAL] Alphanumeric SenderID detected")
score = score * 0.7
urls = extract_urls(text)
if urls:
logging.info(f"[URL] URLs found: {urls}")
logging.info("[URL] Searching for previous scans")
search_results = [urlscan.search(f"domain:{extract_domain_from_url(url)}") for url in urls]
scan_results = []
for search_result in search_results:
results = search_result.get('results', [])
for result in results:
result_uuid = result.get('_id', str)
scan_result = urlscan.get_result(result_uuid)
scan_results.append(scan_result)
if not scan_results:
logging.info("[URL] No previous scan found, launching a new scan...")
scan_results = [urlscan.scan(url) for url in urls]
for result in scan_results:
overall = result.get('verdicts', {}).get('overall', {})
logging.info(f"[URLSCAN] Overall verdict: {overall}")
if overall.get('hasVerdicts'):
score = overall.get('score')
logging.info(f"[URLSCAN] Verdict score: {score}")
if 0 < overall.get('score'):
score = 1.0
break
elif overall.get('score') < 0:
score = score * 0.9
else:
logging.info(f"[URL] No URL found")
score = score * 0.9
logging.info(f"[FINAL SCORE] {score}")
action = ActionModel.NONE
if score > 0.7:
action=ActionModel.JUNK
elif score > 0.5:
if commercial_stop:
action=ActionModel.PROMOTION
else:
action=ActionModel.JUNK
logging.info(f"[FINAL ACTION] {action}")
return OutputModel(action=action, sub_action=SubActionModel.NONE)
@app.post("/report")
def report(model: ReportModel):
submit_entry(model.sender, model.message) |