|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import matplotlib.font_manager as fm |
|
from sklearn.cluster import KMeans |
|
import matplotlib.font_manager as fm |
|
|
|
font_path = '/home/user/app/NanumGothic-Regular.ttf' |
|
fontprop = fm.FontProperties(fname=font_path, size=12) |
|
|
|
|
|
|
|
np.random.seed(42) |
|
num_samples = 30 |
|
traffic_centers = [(20, 20), (80, 80)] |
|
nature_centers = [(0, 80), (80, 0)] |
|
population_centers = [(0, 0), (50, 50), (100, 100)] |
|
|
|
traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2)) |
|
nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2)) |
|
population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2)) |
|
|
|
traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"]) |
|
nature_df = pd.DataFrame(nature_data, columns=["x", "y"]) |
|
population_df = pd.DataFrame(population_data, columns=["x", "y"]) |
|
|
|
def apply_kmeans(data, k): |
|
kmeans = KMeans(n_clusters=k, random_state=42).fit(data) |
|
centroids = kmeans.cluster_centers_ |
|
labels = kmeans.labels_ |
|
return centroids, labels |
|
|
|
def main(): |
|
st.title("K-means ํด๋ฌ์คํฐ๋ง ์๋ฎฌ๋ ์ดํฐ") |
|
|
|
|
|
global traffic_df, nature_df, population_df |
|
|
|
if st.button("๋ฐ์ดํฐ์
์ด๊ธฐํ"): |
|
traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2)) |
|
nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2)) |
|
population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2)) |
|
|
|
traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"]) |
|
nature_df = pd.DataFrame(nature_data, columns=["x", "y"]) |
|
population_df = pd.DataFrame(population_data, columns=["x", "y"]) |
|
|
|
datasets = st.multiselect("๋ฐ์ดํฐ์
์ ํ:", ["๊ตํต์ ๊ทผ์ฑ", "์์ฐํ๊ฒฝ", "์ธ๊ตฌ๋ฐ์ง๋"]) |
|
k_value = st.slider("k ๊ฐ ์ ํ:", 1, 10) |
|
|
|
dataset_mapping = { |
|
"๊ตํต์ ๊ทผ์ฑ": (traffic_df, 'o'), |
|
"์์ฐํ๊ฒฝ": (nature_df, 'x'), |
|
"์ธ๊ตฌ๋ฐ์ง๋": (population_df, '^') |
|
} |
|
|
|
if datasets: |
|
combined_data = pd.concat([dataset_mapping[dataset_name][0] for dataset_name in datasets]) |
|
|
|
centroids, labels = apply_kmeans(combined_data.values, k_value) |
|
fig, ax = plt.subplots(figsize=(8, 8)) |
|
|
|
for dataset_name in datasets: |
|
data, marker = dataset_mapping[dataset_name] |
|
subset_labels = labels[:len(data)] |
|
ax.scatter(data['x'], data['y'], c=subset_labels, cmap='viridis', marker=marker, label=dataset_name) |
|
labels = labels[len(data):] |
|
|
|
ax.scatter(centroids[:, 0], centroids[:, 1], s=200, c='red', marker='X') |
|
ax.set_xlim(0, 100) |
|
ax.set_ylim(0, 100) |
|
ax.set_title(f"K-means ํด๋ฌ์คํฐ๋ง ๊ฒฐ๊ณผ (k={k_value})", fontproperties=fontprop) |
|
ax.legend(prop=fontprop) |
|
st.pyplot(fig) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|