File size: 1,161 Bytes
f02a16d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import torch
import re
from model import BeastTokenizer, BeastSpamModel
from safetensors.torch import load_file
def predict_spam(text, tokenizer, model):
cleaned = re.sub(r"\s+", " ", re.sub(r"\W", " ", re.sub(r"http\S+", "", text.lower()))).strip()
encoded = tokenizer.encode(cleaned)
tensor = torch.tensor([encoded], dtype=torch.long)
with torch.no_grad():
output = model(tensor).item()
return "๐ฅ It is SPAM!" if output > 0.5 else "โ
It is NOT spam."
if __name__ == "__main__":
print("๐ฉ Enter the full email content below (press Enter twice to finish):\n")
lines = []
while True:
line = input()
if line.strip() == "":
break
lines.append(line)
email = "\n".join(lines)
# Load tokenizer vocab (manually or from file)
texts = ["this is dummy tokenizer data"]
tokenizer = BeastTokenizer(texts)
# Load model
model = BeastSpamModel(len(tokenizer.word2idx))
model.load_state_dict(load_file("beast_spam_model.safetensors"))
model.eval()
print("\n[๐] Checking email...")
print(f"[๐ง ] Result: {predict_spam(email, tokenizer, model)}")
|