Spaces:

UpendraAI
/

hindi-classifier

Sleeping

App Files Files Community

UpendraAI commited on Apr 26

Commit

9d74e47

verified ·

1 Parent(s): 3d95dfa

Create app.py

Browse files

Files changed (1) hide show

app.py +52 -0

app.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import streamlit as st
+import pandas as pd
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import LabelEncoder
+from imblearn.over_sampling import RandomOverSampler
+from sklearn.model_selection import train_test_split
+@st.cache_data
+def load_data():
+    df = pd.read_csv("SushasanSampleData.csv", encoding='utf-8')
+    df = df.drop(columns=['ulbName', 'wardName'])
+    df['applicationId'] = df['applicationId'].astype(str)
+    df['applicationSubCategoryName'] = df['applicationSubCategoryName'].fillna("अन्य")
+    return df
+@st.cache_resource
+def train_model(df):
+    tfidf = TfidfVectorizer(max_features=5000)
+    X = tfidf.fit_transform(df['applicationDetail'])
+    label_encoder = LabelEncoder()
+    y = label_encoder.fit_transform(df['applicationCategoryName'])
+    ros = RandomOverSampler(random_state=42)
+    X_resampled, y_resampled = ros.fit_resample(X, y)
+    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
+    model = LogisticRegression(max_iter=1000)
+    model.fit(X_train, y_train)
+    return model, tfidf, label_encoder
+# Load and train
+df = load_data()
+model, tfidf, label_encoder = train_model(df)
+# UI
+st.title("🧾 Hindi Application Category Classifier")
+st.markdown("Enter a grievance or demand in Hindi. The model will predict whether it is a **मांग** (Demand) or a **शिकायत** (Complaint).")
+user_input = st.text_area("✍️ Application Detail", "")
+if st.button("🔍 Predict Category"):
+    if user_input.strip() == "":
+        st.warning("Please enter some text.")
+    else:
+        input_vector = tfidf.transform([user_input])
+        prediction = model.predict(input_vector)
+        label = label_encoder.inverse_transform(prediction)[0]
+        st.success(f"🧠 Predicted Category: **{label}**")