|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from sklearn.cluster import KMeans |
|
|
|
|
|
np.random.seed(42) |
|
num_samples = 30 |
|
traffic_centers = [(20, 20), (80, 80)] |
|
nature_centers = [(0, 80), (80, 0)] |
|
population_centers = [(0, 0), (50, 50), (100, 100)] |
|
|
|
traffic_data = [np.random.normal(center, 10, (num_samples, 2)) for center in traffic_centers] |
|
nature_data = [np.random.normal(center, 10, (num_samples, 2)) for center in nature_centers] |
|
population_data = [np.random.normal(center, 10, (num_samples, 2)) for center in population_centers] |
|
|
|
traffic_df = pd.DataFrame(np.vstack(traffic_data), columns=["x", "y"]) |
|
nature_df = pd.DataFrame(np.vstack(nature_data), columns=["x", "y"]) |
|
population_df = pd.DataFrame(np.vstack(population_data), columns=["x", "y"]) |
|
|
|
def apply_kmeans(data, k): |
|
kmeans = KMeans(n_clusters=k, random_state=42).fit(data) |
|
centroids = kmeans.cluster_centers_ |
|
labels = kmeans.labels_ |
|
return centroids, labels |
|
|
|
def generate_data(): |
|
global traffic_df, nature_df, population_df |
|
|
|
|
|
traffic_data = np.random.uniform(0, 100, (num_samples, 2)) |
|
nature_data = np.random.uniform(0, 100, (num_samples, 2)) |
|
population_data = np.random.uniform(0, 100, (num_samples, 2)) |
|
|
|
traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"]) |
|
nature_df = pd.DataFrame(nature_data, columns=["x", "y"]) |
|
population_df = pd.DataFrame(population_data, columns=["x", "y"]) |
|
|
|
def main(): |
|
st.title("K-means Clustering Simulator") |
|
|
|
if st.button("Initialize Datasets"): |
|
generate_data() |
|
datasets = st.multiselect("Choose datasets:", ["κ΅ν΅μ κ·Όμ±", "μμ°νκ²½", "μΈκ΅¬λ°μ§λ"]) |
|
k_value = st.slider("Select k value:", 1, 10) |
|
|
|
dataset_mapping = { |
|
"κ΅ν΅μ κ·Όμ±": traffic_df, |
|
"μμ°νκ²½": nature_df, |
|
"μΈκ΅¬λ°μ§λ": population_df |
|
} |
|
|
|
|
|
if datasets: |
|
combined_data = pd.concat([dataset_mapping[dataset_name] for dataset_name in datasets]) |
|
|
|
fig, ax = plt.subplots(figsize=(8, 8)) |
|
|
|
centroids, labels = apply_kmeans(combined_data.values, k_value) |
|
ax.scatter(combined_data['x'], combined_data['y'], c=labels, cmap='viridis') |
|
ax.scatter(centroids[:, 0], centroids[:, 1], s=200, c='red', marker='X') |
|
|
|
ax.set_xlim(0, 100) |
|
ax.set_ylim(0, 100) |
|
ax.set_title(f"K-means clustering result (k={k_value})") |
|
st.pyplot(fig) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|
|
|