File size: 3,924 Bytes
eb87013
e1cc8c0
eb87013
 
 
 
e1cc8c0
 
 
f8233d7
e1cc8c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb87013
e1cc8c0
eb87013
e1cc8c0
 
eb87013
 
 
 
e1cc8c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import streamlit as st
import pickle
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Download NLTK resources
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

# Load the pre-trained model and vectorizer
@st.cache_resource
def load_model():
    try:
        # Load TF-IDF Vectorizer
        with open('tfidf_vectorizer.pkl', 'rb') as vectorizer_file:
            tfidf_vectorizer = pickle.load(vectorizer_file)
        
        # Load KNN Classifier
        with open('knn_model.pkl', 'rb') as model_file:
            knn_classifier = pickle.load(model_file)
        
        return tfidf_vectorizer, knn_classifier
    except Exception as e:
        st.error(f"Error loading model: {e}")
        return None, None

# Preprocess text function
def preprocess_text(text):
    # Tokenization and preprocessing
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    words = [word for word in words if word.isalpha() and word not in stop_words]
    return ' '.join(words)

# Main Streamlit App
def main():
    # Set page title and favicon
    st.set_page_config(
        page_title="Disease Classification by Symptoms", 
        page_icon=":medical_symbol:",
        layout="centered"
    )

    # Title and description
    st.title("🩺 Disease Classification Predictor")
    st.markdown("""
    ### Predict Potential Diseases Based on Symptoms
    
    Enter your symptoms below, and our AI model will help predict possible diseases.
    """)

    # Load model and vectorizer
    tfidf_vectorizer, knn_classifier = load_model()

    # Input form for symptoms
    with st.form(key='symptom_form'):
        symptoms = st.text_area(
            "Enter your symptoms:", 
            placeholder="Example: low appetite, fever, headache",
            help="Provide a detailed description of your symptoms"
        )
        
        submit_button = st.form_submit_button(label="Predict Disease")

    # Prediction logic
    if submit_button:
        if not symptoms:
            st.warning("Please enter some symptoms.")
            return

        try:
            # Preprocess input symptoms
            preprocessed_symptoms = preprocess_text(symptoms)

            # Transform symptoms using TF-IDF vectorizer
            symptoms_tfidf = tfidf_vectorizer.transform([preprocessed_symptoms])

            # Predict disease
            predicted_disease = knn_classifier.predict(symptoms_tfidf)

            # Display prediction
            st.success(f"Predicted Disease: {predicted_disease[0]}")

            # Additional information (optional)
            st.info("""
            ### Disclaimer
            - This is an AI-based prediction and should not replace professional medical advice
            - Always consult with a healthcare professional for accurate diagnosis
            - The prediction is based on machine learning analysis of symptom patterns
            """)

        except Exception as e:
            st.error(f"An error occurred during prediction: {e}")

    # Sidebar with additional information
    st.sidebar.title("About the Model")
    st.sidebar.markdown("""
    ### Disease Classification Model
    - **Algorithm**: K-Nearest Neighbors (KNN)
    - **Features**: TF-IDF Vectorization
    - **Trained on**: Symptom to Disease Dataset
    
    #### How it works:
    1. Transform symptoms into numerical features
    2. Compare with known disease patterns
    3. Predict most likely disease
    """)

# Run the app
if __name__ == "__main__":
    main()

# Additional requirements.txt content
"""
streamlit
scikit-learn
nltk
pickle5
"""

# Deployment Notes
"""
Deployment Steps:
1. Install requirements:
   pip install -r requirements.txt
   
2. Download NLTK resources:
   python -m nltk.downloader punkt
   python -m nltk.downloader stopwords

3. Run the Streamlit app:
   streamlit run app.py
"""