Spaces:

Meryb
/

spam-detector

Sleeping

File size: 2,250 Bytes

b48b3fd
 
 
4990b32
 
 
b48b3fd
4990b32
c3db3b9
b48b3fd
c3db3b9
b48b3fd
4990b32
b48b3fd
 
 
4990b32
 
 
 
b48b3fd
4990b32
 
b48b3fd
4990b32
 
 
b48b3fd
4990b32
 
 
c3db3b9
4990b32
 
 
 
 
 
 
 
 
c3db3b9
4990b32
c3db3b9
 
4990b32
 
 
 
 
 
c3db3b9
4990b32
 
c3db3b9
 
4990b32
 
 
 
 
 
 
c3db3b9

import pandas as pd
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# 1 Load and clean the dataset
data = pd.read_csv("spam.csv")
data.drop_duplicates(inplace=True)
data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'Spam'])

# 2 Prepare the data
X = data['Message']
y = data['Category']

# 3 Train‑test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 4 Vectorizer (TF‑IDF instead of Bag‑of‑Words)
vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))

# 5 Transform text to features
X_train_tf = vectorizer.fit_transform(X_train)
X_test_tf  = vectorizer.transform(X_test)

# 6 Model (Logistic Regression)
model = LogisticRegression(max_iter=200, n_jobs=-1)
model.fit(X_train_tf, y_train)

# ⭐ Optional: print metrics to the HF Logs tab
print(classification_report(y_test, model.predict(X_test_tf)))

# 7 Prediction function
def predict_spam(message: str) -> str:
    features = vectorizer.transform([message])
    return model.predict(features)[0]

# 8 Build improved UI
with gr.Blocks(theme=gr.themes.Default()) as demo:
    gr.Markdown("## 📩 Spam Detector&nbsp;&nbsp;|&nbsp;&nbsp;TF‑IDF + Logistic Regression")
    
    with gr.Row():
        msg_box = gr.Textbox(
            label="Your Message",
            placeholder="e.g. Congratulations! You've won a prize...",
            lines=4,
        )
        output = gr.Label(label="Prediction")
    
    detect_btn = gr.Button("Detect Spam", variant="primary")
    detect_btn.click(fn=predict_spam, inputs=msg_box, outputs=output)
    
    gr.Examples(
        examples=[
            ["Congratulations! You've won a $1000 Walmart gift card."],
            ["Your PayPal account is on hold. Log in now to fix the issue."],
            ["Hey, let's meet for lunch tomorrow at 1?"],
            ["URGENT! Verify your bank details immediately or your account will be locked."],
        ],
        inputs=msg_box,
    )

if __name__ == "__main__":
    demo.launch()