File size: 2,569 Bytes
d21e97b
 
 
 
 
 
f33edaa
d21e97b
b73f6ae
126f1a6
 
 
d21e97b
77631e1
 
 
d21e97b
126f1a6
 
 
d21e97b
 
 
 
 
 
 
10981ed
d21e97b
f33edaa
d21e97b
10981ed
 
 
620c237
126f1a6
 
 
 
 
 
 
 
b46415a
d21e97b
 
b46415a
f33edaa
 
 
b46415a
f33edaa
 
 
 
 
 
7333f3f
f33edaa
7333f3f
f33edaa
 
 
 
 
 
d21e97b
 
f33edaa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

# ๋ฐ์ดํ„ฐ ์ƒ์„ฑ
np.random.seed(42)
num_samples = 30
traffic_centers = [(20, 20), (80, 80)]
nature_centers = [(0, 80), (80, 0)]
population_centers = [(0, 0), (50, 50), (100, 100)]

traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2))
nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2))
population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2))

traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"])
nature_df = pd.DataFrame(nature_data, columns=["x", "y"])
population_df = pd.DataFrame(population_data, columns=["x", "y"])

def apply_kmeans(data, k):
    kmeans = KMeans(n_clusters=k, random_state=42).fit(data)
    centroids = kmeans.cluster_centers_
    labels = kmeans.labels_
    return centroids, labels


def main():
    st.title("K-means Clustering simulator \n k-means Clustering ์‹œ๋ฎฌ๋ ˆ์ด์…˜์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.")
    
    # Global variables declaration
    global traffic_df, nature_df, population_df

    if st.button("Initialize Datasets"):
        traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2))
        nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2))
        population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2))

        traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"])
        nature_df = pd.DataFrame(nature_data, columns=["x", "y"])
        population_df = pd.DataFrame(population_data, columns=["x", "y"])
    
    datasets = st.multiselect("Choose datasets:", ["๊ตํ†ต์ ‘๊ทผ์„ฑ", "์ž์—ฐํ™˜๊ฒฝ", "์ธ๊ตฌ๋ฐ€์ง‘๋„"])
    k_value = st.slider("Select k value:", 1, 10)
    
    dataset_mapping = {
        "๊ตํ†ต์ ‘๊ทผ์„ฑ": (traffic_df, 'o')
        "์ž์—ฐํ™˜๊ฒฝ": (nature_df, 'x')
        "์ธ๊ตฌ๋ฐ€์ง‘๋„": (population_df, 'โ–ฒ')
    }

    fig, ax = plt.subplots(figsize=(8, 8))

   for dataset_name in datasets:
        data, marker = dataset_mapping[dataset_name]
        centroids, labels = apply_kmeans(data.values, k_value)
        
        ax.scatter(data['x'], data['y'], c=labels, cmap='viridis', marker=marker, label=dataset_name)
        ax.scatter(centroids[:, 0], centroids[:, 1], s=200, c='red', marker='X')
    
    ax.set_xlim(0, 100)
    ax.set_ylim(0, 100)
    ax.set_title(f"K-means clustering result (k={k_value})")
    ax.legend()
    st.pyplot(fig)

if __name__ == "__main__":
    main()