File size: 3,025 Bytes
d21e97b
 
 
 
b74390d
d21e97b
3ff0b10
d21e97b
5d64f37
e18d4ac
5d64f37
b74390d
 
d21e97b
b73f6ae
126f1a6
 
 
d21e97b
77631e1
 
 
d21e97b
126f1a6
 
 
d21e97b
 
 
 
 
 
 
 
b74390d
d21e97b
10981ed
 
 
b74390d
126f1a6
 
 
 
 
 
 
 
b74390d
 
d21e97b
b46415a
b74390d
 
 
b46415a
706abd1
0e44054
 
7333f3f
0e44054
 
 
 
 
 
 
 
 
 
 
 
e18d4ac
 
0e44054
d21e97b
 
b74390d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from sklearn.cluster import KMeans
import matplotlib.font_manager as fm

font_path = '/home/user/app/NanumGothic-Regular.ttf'
fontprop = fm.FontProperties(fname=font_path, size=12)


# Data Generation
np.random.seed(42)
num_samples = 30
traffic_centers = [(20, 20), (80, 80)]
nature_centers = [(0, 80), (80, 0)]
population_centers = [(0, 0), (50, 50), (100, 100)]

traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2))
nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2))
population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2))

traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"])
nature_df = pd.DataFrame(nature_data, columns=["x", "y"])
population_df = pd.DataFrame(population_data, columns=["x", "y"])

def apply_kmeans(data, k):
    kmeans = KMeans(n_clusters=k, random_state=42).fit(data)
    centroids = kmeans.cluster_centers_
    labels = kmeans.labels_
    return centroids, labels

def main():
    st.title("K-means ํด๋Ÿฌ์Šคํ„ฐ๋ง ์‹œ๋ฎฌ๋ ˆ์ดํ„ฐ")
    
    # Global variables declaration
    global traffic_df, nature_df, population_df

    if st.button("๋ฐ์ดํ„ฐ์…‹ ์ดˆ๊ธฐํ™”"):
        traffic_data = np.random.uniform(0, 100, (num_samples * len(traffic_centers), 2))
        nature_data = np.random.uniform(0, 100, (num_samples * len(nature_centers), 2))
        population_data = np.random.uniform(0, 100, (num_samples * len(population_centers), 2))

        traffic_df = pd.DataFrame(traffic_data, columns=["x", "y"])
        nature_df = pd.DataFrame(nature_data, columns=["x", "y"])
        population_df = pd.DataFrame(population_data, columns=["x", "y"])
    
    datasets = st.multiselect("๋ฐ์ดํ„ฐ์…‹ ์„ ํƒ:", ["๊ตํ†ต์ ‘๊ทผ์„ฑ", "์ž์—ฐํ™˜๊ฒฝ", "์ธ๊ตฌ๋ฐ€์ง‘๋„"])
    k_value = st.slider("k ๊ฐ’ ์„ ํƒ:", 1, 10)
    
    dataset_mapping = {
        "๊ตํ†ต์ ‘๊ทผ์„ฑ": (traffic_df, 'o'),
        "์ž์—ฐํ™˜๊ฒฝ": (nature_df, 'x'),
        "์ธ๊ตฌ๋ฐ€์ง‘๋„": (population_df, '^')
    }

    if datasets:
        combined_data = pd.concat([dataset_mapping[dataset_name][0] for dataset_name in datasets])
        
        centroids, labels = apply_kmeans(combined_data.values, k_value)
        fig, ax = plt.subplots(figsize=(8, 8))
        
        for dataset_name in datasets:
            data, marker = dataset_mapping[dataset_name]
            subset_labels = labels[:len(data)]
            ax.scatter(data['x'], data['y'], c=subset_labels, cmap='viridis', marker=marker, label=dataset_name)
            labels = labels[len(data):]
            
        ax.scatter(centroids[:, 0], centroids[:, 1], s=200, c='red', marker='X')
        ax.set_xlim(0, 100)
        ax.set_ylim(0, 100)
        ax.set_title(f"K-means ํด๋Ÿฌ์Šคํ„ฐ๋ง ๊ฒฐ๊ณผ (k={k_value})", fontproperties=fontprop)
        ax.legend(prop=fontprop)
        st.pyplot(fig)

if __name__ == "__main__":
    main()