File size: 3,025 Bytes
d21e97b b74390d d21e97b 3ff0b10 d21e97b 5d64f37 e18d4ac 5d64f37 b74390d d21e97b b73f6ae 126f1a6 d21e97b 77631e1 d21e97b 126f1a6 d21e97b b74390d d21e97b 10981ed b74390d 126f1a6 b74390d d21e97b b46415a b74390d b46415a 706abd1 0e44054 7333f3f 0e44054 e18d4ac 0e44054 d21e97b b74390d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from sklearn.cluster import KMeans
import matplotlib.font_manager as fm
font_path = '/home/user/app/NanumGothic-Regular.ttf'
fontprop = fm.FontProperties(fname=font_path, size=12)
# Data Generation
np.random.seed(42)
num_samples = 30
traffic_centers = [(20, 20), (80, 80)]
nature_centers = [(0, 80), (80, 0)]
population_centers = [(0, 0), (50, 50), (100, 100)]
traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2))
nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2))
population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2))
traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"])
nature_df = pd.DataFrame(nature_data, columns=["x", "y"])
population_df = pd.DataFrame(population_data, columns=["x", "y"])
def apply_kmeans(data, k):
kmeans = KMeans(n_clusters=k, random_state=42).fit(data)
centroids = kmeans.cluster_centers_
labels = kmeans.labels_
return centroids, labels
def main():
st.title("K-means ํด๋ฌ์คํฐ๋ง ์๋ฎฌ๋ ์ดํฐ")
# Global variables declaration
global traffic_df, nature_df, population_df
if st.button("๋ฐ์ดํฐ์
์ด๊ธฐํ"):
traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2))
nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2))
population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2))
traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"])
nature_df = pd.DataFrame(nature_data, columns=["x", "y"])
population_df = pd.DataFrame(population_data, columns=["x", "y"])
datasets = st.multiselect("๋ฐ์ดํฐ์
์ ํ:", ["๊ตํต์ ๊ทผ์ฑ", "์์ฐํ๊ฒฝ", "์ธ๊ตฌ๋ฐ์ง๋"])
k_value = st.slider("k ๊ฐ ์ ํ:", 1, 10)
dataset_mapping = {
"๊ตํต์ ๊ทผ์ฑ": (traffic_df, 'o'),
"์์ฐํ๊ฒฝ": (nature_df, 'x'),
"์ธ๊ตฌ๋ฐ์ง๋": (population_df, '^')
}
if datasets:
combined_data = pd.concat([dataset_mapping[dataset_name][0] for dataset_name in datasets])
centroids, labels = apply_kmeans(combined_data.values, k_value)
fig, ax = plt.subplots(figsize=(8, 8))
for dataset_name in datasets:
data, marker = dataset_mapping[dataset_name]
subset_labels = labels[:len(data)]
ax.scatter(data['x'], data['y'], c=subset_labels, cmap='viridis', marker=marker, label=dataset_name)
labels = labels[len(data):]
ax.scatter(centroids[:, 0], centroids[:, 1], s=200, c='red', marker='X')
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)
ax.set_title(f"K-means ํด๋ฌ์คํฐ๋ง ๊ฒฐ๊ณผ (k={k_value})", fontproperties=fontprop)
ax.legend(prop=fontprop)
st.pyplot(fig)
if __name__ == "__main__":
main()
|