|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from sklearn.cluster import KMeans |
|
|
|
|
|
np.random.seed(42) |
|
num_samples = 30 |
|
traffic_centers = [(20, 20), (80, 80)] |
|
nature_centers = [(0, 80), (80, 0)] |
|
population_centers = [(0, 0), (50, 50), (100, 100)] |
|
|
|
traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2)) |
|
nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2)) |
|
population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2)) |
|
|
|
traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"]) |
|
nature_df = pd.DataFrame(nature_data, columns=["x", "y"]) |
|
population_df = pd.DataFrame(population_data, columns=["x", "y"]) |
|
|
|
def apply_kmeans(data, k): |
|
kmeans = KMeans(n_clusters=k, random_state=42).fit(data) |
|
centroids = kmeans.cluster_centers_ |
|
labels = kmeans.labels_ |
|
return centroids, labels |
|
|
|
|
|
def main(): |
|
st.title("K-means Clustering simulator \n k-means Clustering ์๋ฎฌ๋ ์ด์
์ ์ ๊ณตํฉ๋๋ค.") |
|
|
|
|
|
global traffic_df, nature_df, population_df |
|
|
|
if st.button("Initialize Datasets"): |
|
traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2)) |
|
nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2)) |
|
population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2)) |
|
|
|
traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"]) |
|
nature_df = pd.DataFrame(nature_data, columns=["x", "y"]) |
|
population_df = pd.DataFrame(population_data, columns=["x", "y"]) |
|
|
|
datasets = st.multiselect("Choose datasets:", ["๊ตํต์ ๊ทผ์ฑ", "์์ฐํ๊ฒฝ", "์ธ๊ตฌ๋ฐ์ง๋"]) |
|
k_value = st.slider("Select k value:", 1, 10) |
|
|
|
dataset_mapping = { |
|
"๊ตํต์ ๊ทผ์ฑ": (traffic_df, 'o') |
|
"์์ฐํ๊ฒฝ": (nature_df, 'x') |
|
"์ธ๊ตฌ๋ฐ์ง๋": (population_df, 'โฒ') |
|
} |
|
|
|
fig, ax = plt.subplots(figsize=(8, 8)) |
|
|
|
for dataset_name in datasets: |
|
data, marker = dataset_mapping[dataset_name] |
|
centroids, labels = apply_kmeans(data.values, k_value) |
|
|
|
ax.scatter(data['x'], data['y'], c=labels, cmap='viridis', marker=marker, label=dataset_name) |
|
ax.scatter(centroids[:, 0], centroids[:, 1], s=200, c='red', marker='X') |
|
|
|
ax.set_xlim(0, 100) |
|
ax.set_ylim(0, 100) |
|
ax.set_title(f"K-means clustering result (k={k_value})") |
|
ax.legend() |
|
st.pyplot(fig) |
|
|
|
if __name__ == "__main__": |
|
main() |