import gradio as gr import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.neighbors import NearestNeighbors # Load data citizen_data = pd.read_csv("citizen_data_50000.csv") services_data = pd.read_csv("citizen_services_50000.csv") merged_data = pd.merge(citizen_data, services_data, on="citizen_id") # Encode categorical features including location categorical_cols = ['gender', 'education', 'marital_status', 'social_category', 'village', 'block', 'district'] encoders = {} for col in categorical_cols: le = LabelEncoder() merged_data[col] = le.fit_transform(merged_data[col]) encoders[col] = le # Normalize numeric columns scaler = StandardScaler() merged_data[['age', 'annual_income']] = scaler.fit_transform(merged_data[['age', 'annual_income']]) # Define feature columns including location feature_cols = ['age', 'gender', 'education', 'marital_status', 'social_category', 'annual_income', 'village', 'block', 'district'] features_matrix = merged_data[feature_cols].values # Train KNN model knn = NearestNeighbors(n_neighbors=10, metric='cosine') knn.fit(features_matrix) # Recommendation function with location def recommend(age, gender, education, marital, social, income, village, block, district): input_data = { 'age': [age], 'gender': [encoders['gender'].transform([gender])[0]], 'education': [encoders['education'].transform([education])[0]], 'marital_status': [encoders['marital_status'].transform([marital])[0]], 'social_category': [encoders['social_category'].transform([social])[0]], 'annual_income': [income], 'village': [encoders['village'].transform([village])[0]], 'block': [encoders['block'].transform([block])[0]], 'district': [encoders['district'].transform([district])[0]], } input_vector = pd.DataFrame(input_data) # Normalize numeric columns input_vector[['age', 'annual_income']] = scaler.transform(input_vector[['age', 'annual_income']]) # Get neighbors and calculate weighted similarity distances, indices = knn.kneighbors(input_vector.values, n_neighbors=10) similarities = 1 - distances[0] top_users = merged_data.iloc[indices[0]].copy() top_users['similarity_weight'] = similarities service_cols = [col for col in services_data.columns if col != "citizen_id"] for col in service_cols: top_users[col] *= top_users['similarity_weight'] recommended_services = top_users[service_cols].sum().sort_values(ascending=False).head(5) return [f"{service} ✅" for service in recommended_services.index] # Gradio UI with location inputs iface = gr.Interface( fn=recommend, inputs=[ gr.Slider(18, 100, value=30, label="Age"), gr.Dropdown(encoders['gender'].classes_.tolist(), label="Gender"), gr.Dropdown(encoders['education'].classes_.tolist(), label="Education Level"), gr.Dropdown(encoders['marital_status'].classes_.tolist(), label="Marital Status"), gr.Dropdown(encoders['social_category'].classes_.tolist(), label="Social Category"), gr.Slider(0, 2000000, value=300000, step=10000, label="Annual Income"), gr.Dropdown(encoders['village'].classes_.tolist(), label="Village"), gr.Dropdown(encoders['block'].classes_.tolist(), label="Block"), gr.Dropdown(encoders['district'].classes_.tolist(), label="District"), ], outputs=gr.List(label="Top 5 Recommended Services"), title="🧠 Citizen Service Recommender", description="Get recommended services based on citizens similar to you — using your demographic and location data.", ) iface.launch()