Meryb commited on
Commit
4990b32
·
verified ·
1 Parent(s): c3db3b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -40
app.py CHANGED
@@ -1,65 +1,67 @@
1
  import pandas as pd
2
  import gradio as gr
3
  from sklearn.model_selection import train_test_split
4
- from sklearn.feature_extraction.text import CountVectorizer
5
- from sklearn.naive_bayes import MultinomialNB
 
6
 
7
- # Load and clean the dataset
8
  data = pd.read_csv("spam.csv")
9
  data.drop_duplicates(inplace=True)
10
  data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'Spam'])
11
 
12
- # Prepare data
13
  X = data['Message']
14
  y = data['Category']
15
 
16
- # Split into training and testing sets
17
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 
 
18
 
19
- # Convert text data to numerical features
20
- vectorizer = CountVectorizer(stop_words='english')
21
- X_train_features = vectorizer.fit_transform(X_train)
22
- X_test_features = vectorizer.transform(X_test)
23
 
24
- # Train the model
25
- model = MultinomialNB()
26
- model.fit(X_train_features, y_train)
27
 
28
- # Define prediction function
29
- def predict_spam(message):
30
- message_features = vectorizer.transform([message])
31
- prediction = model.predict(message_features)[0]
32
- return prediction
33
 
34
- # Build better UI
 
 
 
 
 
 
 
 
35
  with gr.Blocks(theme=gr.themes.Default()) as demo:
36
- gr.Markdown("## 📩 Spam Detector\nEnter any message below to check if it's spam or not.")
37
 
38
  with gr.Row():
39
- with gr.Column(scale=3):
40
- message_input = gr.Textbox(
41
- label="Your Message",
42
- placeholder="e.g. Congratulations! You've won a prize...",
43
- lines=4
44
- )
45
- submit_btn = gr.Button("Detect Spam")
46
- with gr.Column(scale=2):
47
- result_output = gr.Label(label="Prediction")
48
 
49
- examples = [
50
- ["Congratulations! You have been selected for a free cruise!"],
51
- ["Hey, what time is class tomorrow?"],
52
- ["Win cash now!!! Click here"],
53
- ["Lunch at 1 PM?"],
54
- ]
55
 
56
  gr.Examples(
57
- examples=examples,
58
- inputs=message_input
 
 
 
 
 
59
  )
60
-
61
- submit_btn.click(fn=predict_spam, inputs=message_input, outputs=result_output)
62
 
63
- # Launch app
64
  if __name__ == "__main__":
65
  demo.launch()
 
1
  import pandas as pd
2
  import gradio as gr
3
  from sklearn.model_selection import train_test_split
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.linear_model import LogisticRegression
6
+ from sklearn.metrics import classification_report
7
 
8
+ # 1 Load and clean the dataset
9
  data = pd.read_csv("spam.csv")
10
  data.drop_duplicates(inplace=True)
11
  data['Category'] = data['Category'].replace(['ham', 'spam'], ['Not spam', 'Spam'])
12
 
13
+ # 2 Prepare the data
14
  X = data['Message']
15
  y = data['Category']
16
 
17
+ # 3 Train‑test split
18
+ X_train, X_test, y_train, y_test = train_test_split(
19
+ X, y, test_size=0.2, random_state=42, stratify=y
20
+ )
21
 
22
+ # 4 Vectorizer (TF‑IDF instead of Bag‑of‑Words)
23
+ vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
 
 
24
 
25
+ # 5 Transform text to features
26
+ X_train_tf = vectorizer.fit_transform(X_train)
27
+ X_test_tf = vectorizer.transform(X_test)
28
 
29
+ # 6 Model (Logistic Regression)
30
+ model = LogisticRegression(max_iter=200, n_jobs=-1)
31
+ model.fit(X_train_tf, y_train)
 
 
32
 
33
+ # ⭐ Optional: print metrics to the HF Logs tab
34
+ print(classification_report(y_test, model.predict(X_test_tf)))
35
+
36
+ # 7 Prediction function
37
+ def predict_spam(message: str) -> str:
38
+ features = vectorizer.transform([message])
39
+ return model.predict(features)[0]
40
+
41
+ # 8 Build improved UI
42
  with gr.Blocks(theme=gr.themes.Default()) as demo:
43
+ gr.Markdown("## 📩 Spam Detector  |  TF‑IDF + Logistic Regression")
44
 
45
  with gr.Row():
46
+ msg_box = gr.Textbox(
47
+ label="Your Message",
48
+ placeholder="e.g. Congratulations! You've won a prize...",
49
+ lines=4,
50
+ )
51
+ output = gr.Label(label="Prediction")
 
 
 
52
 
53
+ detect_btn = gr.Button("Detect Spam", variant="primary")
54
+ detect_btn.click(fn=predict_spam, inputs=msg_box, outputs=output)
 
 
 
 
55
 
56
  gr.Examples(
57
+ examples=[
58
+ ["Congratulations! You've won a $1000 Walmart gift card."],
59
+ ["Your PayPal account is on hold. Log in now to fix the issue."],
60
+ ["Hey, let's meet for lunch tomorrow at 1?"],
61
+ ["URGENT! Verify your bank details immediately or your account will be locked."],
62
+ ],
63
+ inputs=msg_box,
64
  )
 
 
65
 
 
66
  if __name__ == "__main__":
67
  demo.launch()