AICOE-PR-BSK / app.py
billusanda007's picture
Update app.py (#3)
49e5bde verified
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors
# Load data
citizen_data = pd.read_csv("citizen_data_50000.csv")
services_data = pd.read_csv("citizen_services_50000.csv")
merged_data = pd.merge(citizen_data, services_data, on="citizen_id")
# Encode categorical features including location
categorical_cols = ['gender', 'education', 'marital_status', 'social_category',
'village', 'block', 'district']
encoders = {}
for col in categorical_cols:
le = LabelEncoder()
merged_data[col] = le.fit_transform(merged_data[col])
encoders[col] = le
# Normalize numeric columns
scaler = StandardScaler()
merged_data[['age', 'annual_income']] = scaler.fit_transform(merged_data[['age', 'annual_income']])
# Define feature columns including location
feature_cols = ['age', 'gender', 'education', 'marital_status', 'social_category',
'annual_income', 'village', 'block', 'district']
features_matrix = merged_data[feature_cols].values
# Train KNN model
knn = NearestNeighbors(n_neighbors=10, metric='cosine')
knn.fit(features_matrix)
# Recommendation function with location
def recommend(age, gender, education, marital, social, income, village, block, district):
input_data = {
'age': [age],
'gender': [encoders['gender'].transform([gender])[0]],
'education': [encoders['education'].transform([education])[0]],
'marital_status': [encoders['marital_status'].transform([marital])[0]],
'social_category': [encoders['social_category'].transform([social])[0]],
'annual_income': [income],
'village': [encoders['village'].transform([village])[0]],
'block': [encoders['block'].transform([block])[0]],
'district': [encoders['district'].transform([district])[0]],
}
input_vector = pd.DataFrame(input_data)
# Normalize numeric columns
input_vector[['age', 'annual_income']] = scaler.transform(input_vector[['age', 'annual_income']])
# Get neighbors and calculate weighted similarity
distances, indices = knn.kneighbors(input_vector.values, n_neighbors=10)
similarities = 1 - distances[0]
top_users = merged_data.iloc[indices[0]].copy()
top_users['similarity_weight'] = similarities
service_cols = [col for col in services_data.columns if col != "citizen_id"]
for col in service_cols:
top_users[col] *= top_users['similarity_weight']
recommended_services = top_users[service_cols].sum().sort_values(ascending=False).head(5)
return [f"{service} βœ…" for service in recommended_services.index]
# Gradio UI with location inputs
iface = gr.Interface(
fn=recommend,
inputs=[
gr.Slider(18, 100, value=30, label="Age"),
gr.Dropdown(encoders['gender'].classes_.tolist(), label="Gender"),
gr.Dropdown(encoders['education'].classes_.tolist(), label="Education Level"),
gr.Dropdown(encoders['marital_status'].classes_.tolist(), label="Marital Status"),
gr.Dropdown(encoders['social_category'].classes_.tolist(), label="Social Category"),
gr.Slider(0, 2000000, value=300000, step=10000, label="Annual Income"),
gr.Dropdown(encoders['village'].classes_.tolist(), label="Village"),
gr.Dropdown(encoders['block'].classes_.tolist(), label="Block"),
gr.Dropdown(encoders['district'].classes_.tolist(), label="District"),
],
outputs=gr.List(label="Top 5 Recommended Services"),
title="🧠 Citizen Service Recommender",
description="Get recommended services based on citizens similar to you β€” using your demographic and location data.",
)
iface.launch()