Spaces:

Meryb
/

spam-detector

Sleeping

App Files Files Community

Meryb commited on Jul 12

Commit

6b23f9b

verified ·

1 Parent(s): 4990b32

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -58

app.py CHANGED Viewed

@@ -1,67 +1,41 @@
 import pandas as pd
 import gradio as gr
 from sklearn.model_selection import train_test_split
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import classification_report
-# 1 Load and clean the dataset
-data = pd.read_csv("spam.csv")
 data.drop_duplicates(inplace=True)
-data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'Spam'])
-# 2 Prepare the data
 X = data['Message']
 y = data['Category']
-# 3 Train‑test split
-X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=0.2, random_state=42, stratify=y
-)
-# 4 Vectorizer (TF‑IDF instead of Bag‑of‑Words)
-vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
-# 5 Transform text to features
-X_train_tf = vectorizer.fit_transform(X_train)
-X_test_tf  = vectorizer.transform(X_test)
-# 6 Model (Logistic Regression)
-model = LogisticRegression(max_iter=200, n_jobs=-1)
-model.fit(X_train_tf, y_train)
-# ⭐ Optional: print metrics to the HF Logs tab
-print(classification_report(y_test, model.predict(X_test_tf)))
-# 7 Prediction function
-def predict_spam(message: str) -> str:
-    features = vectorizer.transform([message])
-    return model.predict(features)[0]
-# 8 Build improved UI
-with gr.Blocks(theme=gr.themes.Default()) as demo:
-    gr.Markdown("## 📩 Spam Detector&nbsp;&nbsp;|&nbsp;&nbsp;TF‑IDF + Logistic Regression")
-    with gr.Row():
-        msg_box = gr.Textbox(
-            label="Your Message",
-            placeholder="e.g. Congratulations! You've won a prize...",
-            lines=4,
-        )
-        output = gr.Label(label="Prediction")
-    detect_btn = gr.Button("Detect Spam", variant="primary")
-    detect_btn.click(fn=predict_spam, inputs=msg_box, outputs=output)
-    gr.Examples(
-        examples=[
-            ["Congratulations! You've won a $1000 Walmart gift card."],
-            ["Your PayPal account is on hold. Log in now to fix the issue."],
-            ["Hey, let's meet for lunch tomorrow at 1?"],
-            ["URGENT! Verify your bank details immediately or your account will be locked."],
-        ],
-        inputs=msg_box,
-    )
-if __name__ == "__main__":
-    demo.launch()

 import pandas as pd
 import gradio as gr
 from sklearn.model_selection import train_test_split
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.naive_bayes import MultinomialNB
+# Load and clean the dataset
+data = pd.read_csv(r"spam.csv")
 data.drop_duplicates(inplace=True)
+data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'spam'])
+# Prepare data
 X = data['Message']
 y = data['Category']
+# Split into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+# Convert text data to numerical features
+vectorizer = CountVectorizer(stop_words='english')
+X_train_features = vectorizer.fit_transform(X_train)
+X_test_features = vectorizer.transform(X_test)
+# Train the model
+model = MultinomialNB()
+model.fit(X_train_features, y_train)
+# Define prediction function
+def predict_spam(message):
+    message_features = vectorizer.transform([message])
+    prediction = model.predict(message_features)[0]
+    return f"Prediction: {prediction}"
+# Launch Gradio interface
+gr.Interface(
+    fn=predict_spam,
+    inputs="text",
+    outputs="text",
+    title="📩 Spam Detection with Gradio",
+    description="Enter a message and the model will predict whether it's spam or not."
+).launch()