matesoft commited on
Commit
b9be817
·
verified ·
1 Parent(s): 6dcdc09

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -0
app.py CHANGED
@@ -1,7 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pickle
3
  import numpy as np
4
 
 
 
 
 
 
 
5
 
6
  # Preprocess symptoms
7
  all_symptoms = sorted(all_symptoms)
 
1
+ # -*- coding: utf-8 -*-
2
+ """2_preprocessing_test.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/10c3x9G9z70J73l0LJDA8_VDZphQmHEZB
8
+ """
9
+
10
+ from google.colab import drive
11
+ drive.mount('/content/drive')
12
+
13
+ import pandas as pd
14
+ import numpy as np
15
+ import matplotlib.pyplot as plt
16
+ from sklearn.preprocessing import LabelEncoder
17
+ import os
18
+ from sklearn.model_selection import train_test_split
19
+ import pickle
20
+ import warnings
21
+ warnings.filterwarnings('ignore')
22
+
23
+ df1 = pd.read_csv("/content/drive/MyDrive/Google Colab/disease-symptom-prediction/data/dataset.csv")
24
+
25
+ print(df1.shape)
26
+ df1.head()
27
+
28
+ df1.sort_values(by='Disease', inplace=True)
29
+ df1.head()
30
+
31
+ df1.drop_duplicates(inplace=True)
32
+ df1.shape
33
+
34
+ df1['Disease'].value_counts()
35
+
36
+ df1[df1['Disease']=="Fungal infection"]
37
+
38
+ df1.fillna("none", inplace=True)
39
+ df1[df1['Disease']=="Fungal infection"]
40
+
41
+ df1.columns = df1.columns.str.strip().str.lower()
42
+ for col in df1.columns:
43
+ df1[col] = df1[col].astype(str).str.strip().str.lower()
44
+
45
+
46
+ symptom_cols = [col for col in df1.columns if col.startswith('symptom')]
47
+ print(symptom_cols)
48
+
49
+ all_symptoms = set()
50
+ for col in symptom_cols:
51
+ for val in df1[col].unique():
52
+ if val != 'none':
53
+ all_symptoms.add(val)
54
+ print(f"Unique symptoms: {len(all_symptoms)}")
55
+
56
+ print(all_symptoms)
57
+
58
+ df1.head()
59
+
60
+ df1_num = pd.DataFrame(df1['disease'])
61
+
62
+ for symptom in all_symptoms:
63
+ df1_num[symptom] = df1[symptom_cols].apply(lambda row: int(symptom in row.values), axis=1)
64
+
65
+ df1_num
66
+
67
+ X = df1_num.drop('disease', axis=1)
68
+ y = df1_num['disease']
69
+ X.shape, y.shape
70
+
71
+ X.sum(axis=1)
72
+
73
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)
74
+
75
+ print(np.unique(y_train, return_counts=True))
76
+ print(np.unique(y_test, return_counts=True))
77
+
78
+ from sklearn.ensemble import RandomForestClassifier
79
+
80
+ model = RandomForestClassifier(n_estimators=100,random_state=42)
81
+ model.fit(X_train, y_train)
82
+ model.fit(X_train, y_train)
83
+
84
+ import pickle
85
+
86
+ # Save model
87
+ with open("disease_model.pkl", "wb") as f:
88
+ pickle.dump(model, f)
89
+
90
+ # Save symptom list (to use in the app later)
91
+ with open("symptoms.pkl", "wb") as f:
92
+ pickle.dump(list(all_symptoms), f)
93
+
94
+ # Original symptoms (keys)
95
+ all_symptoms = sorted(all_symptoms)
96
+
97
+ # Create display labels by replacing '_' with ' ' and capitalizing each word
98
+ display_symptoms = [symptom.replace('_', ' ').title() for symptom in all_symptoms]
99
+
100
+ # Create a mapping from display label back to original symptom key
101
+ label_to_symptom = dict(zip(display_symptoms, all_symptoms))
102
+
103
+ from sklearn.metrics import accuracy_score, f1_score
104
+
105
+ y_train_pred = model.predict(X_train)
106
+
107
+ train_accuracy = accuracy_score(y_train, y_train_pred)
108
+ train_f1_score = f1_score(y_train, y_train_pred,average="weighted")
109
+
110
+ print("Train Accuracy:", train_accuracy)
111
+ print("Train f1 score:", train_f1_score)
112
+
113
+ y_test_pred = model.predict(X_test)
114
+ test_accuracy = accuracy_score(y_test, y_test_pred)
115
+ test_f1_score = f1_score(y_test, y_test_pred, average="weighted")
116
+ print("Train Accuracy:", test_accuracy)
117
+ print("Train f1 score:", test_f1_score)
118
+
119
+ import numpy as np
120
+
121
+ # Example user symptoms
122
+ user_symptoms = ['nausea', 'vomiting', 'abdominal_pain', 'diarrhoea']
123
+
124
+ # Tip for the user
125
+ if len(user_symptoms) < 4:
126
+ print("Tip: The model performs better if you enter at least 4 symptoms.\n")
127
+
128
+ # Convert symptoms to input vector
129
+ input_vector = [1 if symptom in user_symptoms else 0 for symptom in all_symptoms]
130
+ input_vector = np.array([input_vector])
131
+
132
+ # Make prediction and get probabilities
133
+ probas = model.predict_proba(input_vector)[0]
134
+ max_proba = np.max(probas)
135
+ predicted = model.classes_[np.argmax(probas)]
136
+
137
+ # Confidence threshold
138
+ threshold = 0.5
139
+
140
+ # Print predicted disease and confidence
141
+ if max_proba < threshold:
142
+ print("Warning: The model is not confident about this prediction.")
143
+ print(f"Predicted disease: {predicted} (Confidence: {max_proba * 100:.1f}%)")
144
+ else:
145
+ print(f"Predicted disease: {predicted} (Confidence: {max_proba * 100:.1f}%)")
146
+
147
+ # Function to print top N diseases
148
+ def print_top_diseases(probas, model, top_n=5):
149
+ classes = model.classes_
150
+ sorted_indices = np.argsort(probas)[::-1]
151
+ print(f"\nTop {top_n} possible diseases:")
152
+ for i in range(min(top_n, len(classes))):
153
+ disease = classes[sorted_indices[i]]
154
+ probability = probas[sorted_indices[i]]
155
+ print(f"{i+1}. {disease}: {probability:.4f}")
156
+
157
+ # Show top 5 possible diseases
158
+ print_top_diseases(probas, model, top_n=5)
159
+
160
+ !cp "/content/drive/MyDrive/Google Colab/disease-symptom-prediction/tbc-logo (1).png" "/content/tbc.png"
161
+
162
  import gradio as gr
163
  import pickle
164
  import numpy as np
165
 
166
+ # --- 1. Load Disease Prediction Model ---
167
+ with open("disease_model.pkl", "rb") as f:
168
+ model = pickle.load(f)
169
+
170
+ with open("symptoms.pkl", "rb") as f:
171
+ all_symptoms = pickle.load(f)
172
 
173
  # Preprocess symptoms
174
  all_symptoms = sorted(all_symptoms)