UpendraAI's picture
Create app.py
9d74e47 verified
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
@st.cache_data
def load_data():
df = pd.read_csv("SushasanSampleData.csv", encoding='utf-8')
df = df.drop(columns=['ulbName', 'wardName'])
df['applicationId'] = df['applicationId'].astype(str)
df['applicationSubCategoryName'] = df['applicationSubCategoryName'].fillna("अन्य")
return df
@st.cache_resource
def train_model(df):
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df['applicationDetail'])
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['applicationCategoryName'])
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
return model, tfidf, label_encoder
# Load and train
df = load_data()
model, tfidf, label_encoder = train_model(df)
# UI
st.title("🧾 Hindi Application Category Classifier")
st.markdown("Enter a grievance or demand in Hindi. The model will predict whether it is a **मांग** (Demand) or a **शिकायत** (Complaint).")
user_input = st.text_area("✍️ Application Detail", "")
if st.button("🔍 Predict Category"):
if user_input.strip() == "":
st.warning("Please enter some text.")
else:
input_vector = tfidf.transform([user_input])
prediction = model.predict(input_vector)
label = label_encoder.inverse_transform(prediction)[0]
st.success(f"🧠 Predicted Category: **{label}**")