Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,65 +1,67 @@
|
|
1 |
import pandas as pd
|
2 |
import gradio as gr
|
3 |
from sklearn.model_selection import train_test_split
|
4 |
-
from sklearn.feature_extraction.text import
|
5 |
-
from sklearn.
|
|
|
6 |
|
7 |
-
# Load and clean the dataset
|
8 |
data = pd.read_csv("spam.csv")
|
9 |
data.drop_duplicates(inplace=True)
|
10 |
data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'Spam'])
|
11 |
|
12 |
-
# Prepare data
|
13 |
X = data['Message']
|
14 |
y = data['Category']
|
15 |
|
16 |
-
#
|
17 |
-
X_train, X_test, y_train, y_test = train_test_split(
|
|
|
|
|
18 |
|
19 |
-
#
|
20 |
-
vectorizer =
|
21 |
-
X_train_features = vectorizer.fit_transform(X_train)
|
22 |
-
X_test_features = vectorizer.transform(X_test)
|
23 |
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
|
28 |
-
#
|
29 |
-
|
30 |
-
|
31 |
-
prediction = model.predict(message_features)[0]
|
32 |
-
return prediction
|
33 |
|
34 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
with gr.Blocks(theme=gr.themes.Default()) as demo:
|
36 |
-
gr.Markdown("## 📩 Spam Detector
|
37 |
|
38 |
with gr.Row():
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
submit_btn = gr.Button("Detect Spam")
|
46 |
-
with gr.Column(scale=2):
|
47 |
-
result_output = gr.Label(label="Prediction")
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
["Hey, what time is class tomorrow?"],
|
52 |
-
["Win cash now!!! Click here"],
|
53 |
-
["Lunch at 1 PM?"],
|
54 |
-
]
|
55 |
|
56 |
gr.Examples(
|
57 |
-
examples=
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
59 |
)
|
60 |
-
|
61 |
-
submit_btn.click(fn=predict_spam, inputs=message_input, outputs=result_output)
|
62 |
|
63 |
-
# Launch app
|
64 |
if __name__ == "__main__":
|
65 |
demo.launch()
|
|
|
1 |
import pandas as pd
|
2 |
import gradio as gr
|
3 |
from sklearn.model_selection import train_test_split
|
4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from sklearn.linear_model import LogisticRegression
|
6 |
+
from sklearn.metrics import classification_report
|
7 |
|
8 |
+
# 1 Load and clean the dataset
|
9 |
data = pd.read_csv("spam.csv")
|
10 |
data.drop_duplicates(inplace=True)
|
11 |
data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'Spam'])
|
12 |
|
13 |
+
# 2 Prepare the data
|
14 |
X = data['Message']
|
15 |
y = data['Category']
|
16 |
|
17 |
+
# 3 Train‑test split
|
18 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
19 |
+
X, y, test_size=0.2, random_state=42, stratify=y
|
20 |
+
)
|
21 |
|
22 |
+
# 4 Vectorizer (TF‑IDF instead of Bag‑of‑Words)
|
23 |
+
vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
|
|
|
|
|
24 |
|
25 |
+
# 5 Transform text to features
|
26 |
+
X_train_tf = vectorizer.fit_transform(X_train)
|
27 |
+
X_test_tf = vectorizer.transform(X_test)
|
28 |
|
29 |
+
# 6 Model (Logistic Regression)
|
30 |
+
model = LogisticRegression(max_iter=200, n_jobs=-1)
|
31 |
+
model.fit(X_train_tf, y_train)
|
|
|
|
|
32 |
|
33 |
+
# ⭐ Optional: print metrics to the HF Logs tab
|
34 |
+
print(classification_report(y_test, model.predict(X_test_tf)))
|
35 |
+
|
36 |
+
# 7 Prediction function
|
37 |
+
def predict_spam(message: str) -> str:
|
38 |
+
features = vectorizer.transform([message])
|
39 |
+
return model.predict(features)[0]
|
40 |
+
|
41 |
+
# 8 Build improved UI
|
42 |
with gr.Blocks(theme=gr.themes.Default()) as demo:
|
43 |
+
gr.Markdown("## 📩 Spam Detector | TF‑IDF + Logistic Regression")
|
44 |
|
45 |
with gr.Row():
|
46 |
+
msg_box = gr.Textbox(
|
47 |
+
label="Your Message",
|
48 |
+
placeholder="e.g. Congratulations! You've won a prize...",
|
49 |
+
lines=4,
|
50 |
+
)
|
51 |
+
output = gr.Label(label="Prediction")
|
|
|
|
|
|
|
52 |
|
53 |
+
detect_btn = gr.Button("Detect Spam", variant="primary")
|
54 |
+
detect_btn.click(fn=predict_spam, inputs=msg_box, outputs=output)
|
|
|
|
|
|
|
|
|
55 |
|
56 |
gr.Examples(
|
57 |
+
examples=[
|
58 |
+
["Congratulations! You've won a $1000 Walmart gift card."],
|
59 |
+
["Your PayPal account is on hold. Log in now to fix the issue."],
|
60 |
+
["Hey, let's meet for lunch tomorrow at 1?"],
|
61 |
+
["URGENT! Verify your bank details immediately or your account will be locked."],
|
62 |
+
],
|
63 |
+
inputs=msg_box,
|
64 |
)
|
|
|
|
|
65 |
|
|
|
66 |
if __name__ == "__main__":
|
67 |
demo.launch()
|