import streamlit as st import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import LabelEncoder from imblearn.over_sampling import RandomOverSampler from sklearn.model_selection import train_test_split @st.cache_data def load_data(): df = pd.read_csv("SushasanSampleData.csv", encoding='utf-8') df = df.drop(columns=['ulbName', 'wardName']) df['applicationId'] = df['applicationId'].astype(str) df['applicationSubCategoryName'] = df['applicationSubCategoryName'].fillna("अन्य") return df @st.cache_resource def train_model(df): tfidf = TfidfVectorizer(max_features=5000) X = tfidf.fit_transform(df['applicationDetail']) label_encoder = LabelEncoder() y = label_encoder.fit_transform(df['applicationCategoryName']) ros = RandomOverSampler(random_state=42) X_resampled, y_resampled = ros.fit_resample(X, y) X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42) model = LogisticRegression(max_iter=1000) model.fit(X_train, y_train) return model, tfidf, label_encoder # Load and train df = load_data() model, tfidf, label_encoder = train_model(df) # UI st.title("🧾 Hindi Application Category Classifier") st.markdown("Enter a grievance or demand in Hindi. The model will predict whether it is a **मांग** (Demand) or a **शिकायत** (Complaint).") user_input = st.text_area("✍️ Application Detail", "") if st.button("🔍 Predict Category"): if user_input.strip() == "": st.warning("Please enter some text.") else: input_vector = tfidf.transform([user_input]) prediction = model.predict(input_vector) label = label_encoder.inverse_transform(prediction)[0] st.success(f"🧠 Predicted Category: **{label}**")