File size: 1,161 Bytes
f02a16d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import torch
import re
from model import BeastTokenizer, BeastSpamModel
from safetensors.torch import load_file

def predict_spam(text, tokenizer, model):
    cleaned = re.sub(r"\s+", " ", re.sub(r"\W", " ", re.sub(r"http\S+", "", text.lower()))).strip()
    encoded = tokenizer.encode(cleaned)
    tensor = torch.tensor([encoded], dtype=torch.long)
    with torch.no_grad():
        output = model(tensor).item()
    return "๐Ÿ”ฅ It is SPAM!" if output > 0.5 else "โœ… It is NOT spam."

if __name__ == "__main__":
    print("๐Ÿ“ฉ Enter the full email content below (press Enter twice to finish):\n")
    lines = []
    while True:
        line = input()
        if line.strip() == "":
            break
        lines.append(line)
    email = "\n".join(lines)

    # Load tokenizer vocab (manually or from file)
    texts = ["this is dummy tokenizer data"]
    tokenizer = BeastTokenizer(texts)

    # Load model
    model = BeastSpamModel(len(tokenizer.word2idx))
    model.load_state_dict(load_file("beast_spam_model.safetensors"))
    model.eval()

    print("\n[๐Ÿ”] Checking email...")
    print(f"[๐Ÿง ] Result: {predict_spam(email, tokenizer, model)}")