Meryb commited on
Commit
6b23f9b
·
verified ·
1 Parent(s): 4990b32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -58
app.py CHANGED
@@ -1,67 +1,41 @@
1
  import pandas as pd
2
  import gradio as gr
3
  from sklearn.model_selection import train_test_split
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
- from sklearn.linear_model import LogisticRegression
6
- from sklearn.metrics import classification_report
7
 
8
- # 1 Load and clean the dataset
9
- data = pd.read_csv("spam.csv")
10
  data.drop_duplicates(inplace=True)
11
- data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'Spam'])
12
 
13
- # 2 Prepare the data
14
  X = data['Message']
15
  y = data['Category']
16
 
17
- # 3 Train‑test split
18
- X_train, X_test, y_train, y_test = train_test_split(
19
- X, y, test_size=0.2, random_state=42, stratify=y
20
- )
21
-
22
- # 4 Vectorizer (TF‑IDF instead of Bag‑of‑Words)
23
- vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
24
-
25
- # 5 Transform text to features
26
- X_train_tf = vectorizer.fit_transform(X_train)
27
- X_test_tf = vectorizer.transform(X_test)
28
-
29
- # 6 Model (Logistic Regression)
30
- model = LogisticRegression(max_iter=200, n_jobs=-1)
31
- model.fit(X_train_tf, y_train)
32
-
33
- # ⭐ Optional: print metrics to the HF Logs tab
34
- print(classification_report(y_test, model.predict(X_test_tf)))
35
-
36
- # 7 Prediction function
37
- def predict_spam(message: str) -> str:
38
- features = vectorizer.transform([message])
39
- return model.predict(features)[0]
40
-
41
- # 8 Build improved UI
42
- with gr.Blocks(theme=gr.themes.Default()) as demo:
43
- gr.Markdown("## 📩 Spam Detector  |  TF‑IDF + Logistic Regression")
44
-
45
- with gr.Row():
46
- msg_box = gr.Textbox(
47
- label="Your Message",
48
- placeholder="e.g. Congratulations! You've won a prize...",
49
- lines=4,
50
- )
51
- output = gr.Label(label="Prediction")
52
-
53
- detect_btn = gr.Button("Detect Spam", variant="primary")
54
- detect_btn.click(fn=predict_spam, inputs=msg_box, outputs=output)
55
-
56
- gr.Examples(
57
- examples=[
58
- ["Congratulations! You've won a $1000 Walmart gift card."],
59
- ["Your PayPal account is on hold. Log in now to fix the issue."],
60
- ["Hey, let's meet for lunch tomorrow at 1?"],
61
- ["URGENT! Verify your bank details immediately or your account will be locked."],
62
- ],
63
- inputs=msg_box,
64
- )
65
-
66
- if __name__ == "__main__":
67
- demo.launch()
 
1
  import pandas as pd
2
  import gradio as gr
3
  from sklearn.model_selection import train_test_split
4
+ from sklearn.feature_extraction.text import CountVectorizer
5
+ from sklearn.naive_bayes import MultinomialNB
 
6
 
7
+ # Load and clean the dataset
8
+ data = pd.read_csv(r"spam.csv")
9
  data.drop_duplicates(inplace=True)
10
+ data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'spam'])
11
 
12
+ # Prepare data
13
  X = data['Message']
14
  y = data['Category']
15
 
16
+ # Split into training and testing sets
17
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
18
+
19
+ # Convert text data to numerical features
20
+ vectorizer = CountVectorizer(stop_words='english')
21
+ X_train_features = vectorizer.fit_transform(X_train)
22
+ X_test_features = vectorizer.transform(X_test)
23
+
24
+ # Train the model
25
+ model = MultinomialNB()
26
+ model.fit(X_train_features, y_train)
27
+
28
+ # Define prediction function
29
+ def predict_spam(message):
30
+ message_features = vectorizer.transform([message])
31
+ prediction = model.predict(message_features)[0]
32
+ return f"Prediction: {prediction}"
33
+
34
+ # Launch Gradio interface
35
+ gr.Interface(
36
+ fn=predict_spam,
37
+ inputs="text",
38
+ outputs="text",
39
+ title="📩 Spam Detection with Gradio",
40
+ description="Enter a message and the model will predict whether it's spam or not."
41
+ ).launch()