Spaces:

Meryb
/

spam-detector

Running

App Files Files Community

Meryb commited on 1 day ago

Commit

4990b32

verified ·

1 Parent(s): c3db3b9

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -40

app.py CHANGED Viewed

@@ -1,65 +1,67 @@
 import pandas as pd
 import gradio as gr
 from sklearn.model_selection import train_test_split
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.naive_bayes import MultinomialNB
-# Load and clean the dataset
 data = pd.read_csv("spam.csv")
 data.drop_duplicates(inplace=True)
 data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'Spam'])
-# Prepare data
 X = data['Message']
 y = data['Category']
-# Split into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-# Convert text data to numerical features
-vectorizer = CountVectorizer(stop_words='english')
-X_train_features = vectorizer.fit_transform(X_train)
-X_test_features = vectorizer.transform(X_test)
-# Train the model
-model = MultinomialNB()
-model.fit(X_train_features, y_train)
-# Define prediction function
-def predict_spam(message):
-    message_features = vectorizer.transform([message])
-    prediction = model.predict(message_features)[0]
-    return prediction
-# Build better UI
 with gr.Blocks(theme=gr.themes.Default()) as demo:
-    gr.Markdown("## 📩 Spam Detector\nEnter any message below to check if it's spam or not.")
     with gr.Row():
-        with gr.Column(scale=3):
-            message_input = gr.Textbox(
-                label="Your Message",
-                placeholder="e.g. Congratulations! You've won a prize...",
-                lines=4
-            )
-            submit_btn = gr.Button("Detect Spam")
-        with gr.Column(scale=2):
-            result_output = gr.Label(label="Prediction")
-    examples = [
-        ["Congratulations! You have been selected for a free cruise!"],
-        ["Hey, what time is class tomorrow?"],
-        ["Win cash now!!! Click here"],
-        ["Lunch at 1 PM?"],
-    ]
     gr.Examples(
-        examples=examples,
-        inputs=message_input
     )
-    submit_btn.click(fn=predict_spam, inputs=message_input, outputs=result_output)
-# Launch app
 if __name__ == "__main__":
     demo.launch()

 import pandas as pd
 import gradio as gr
 from sklearn.model_selection import train_test_split
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import classification_report
+# 1 Load and clean the dataset
 data = pd.read_csv("spam.csv")
 data.drop_duplicates(inplace=True)
 data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'Spam'])
+# 2 Prepare the data
 X = data['Message']
 y = data['Category']
+# 3 Train‑test split
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42, stratify=y
+)
+# 4 Vectorizer (TF‑IDF instead of Bag‑of‑Words)
+vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
+# 5 Transform text to features
+X_train_tf = vectorizer.fit_transform(X_train)
+X_test_tf  = vectorizer.transform(X_test)
+# 6 Model (Logistic Regression)
+model = LogisticRegression(max_iter=200, n_jobs=-1)
+model.fit(X_train_tf, y_train)
+# ⭐ Optional: print metrics to the HF Logs tab
+print(classification_report(y_test, model.predict(X_test_tf)))
+# 7 Prediction function
+def predict_spam(message: str) -> str:
+    features = vectorizer.transform([message])
+    return model.predict(features)[0]
+# 8 Build improved UI
 with gr.Blocks(theme=gr.themes.Default()) as demo:
+    gr.Markdown("## 📩 Spam Detector&nbsp;&nbsp;|&nbsp;&nbsp;TF‑IDF + Logistic Regression")
     with gr.Row():
+        msg_box = gr.Textbox(
+            label="Your Message",
+            placeholder="e.g. Congratulations! You've won a prize...",
+            lines=4,
+        )
+        output = gr.Label(label="Prediction")
+    detect_btn = gr.Button("Detect Spam", variant="primary")
+    detect_btn.click(fn=predict_spam, inputs=msg_box, outputs=output)
     gr.Examples(
+        examples=[
+            ["Congratulations! You've won a $1000 Walmart gift card."],
+            ["Your PayPal account is on hold. Log in now to fix the issue."],
+            ["Hey, let's meet for lunch tomorrow at 1?"],
+            ["URGENT! Verify your bank details immediately or your account will be locked."],
+        ],
+        inputs=msg_box,
     )
 if __name__ == "__main__":
     demo.launch()