File size: 2,030 Bytes
f1a15ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# ========== (c) JP Hwang 30/9/2022  ==========

import logging
import pandas as pd
import app
from sklearn.decomposition import PCA, FastICA, LatentDirichletAllocation
from sklearn.manifold import TSNE
import plotly.express as px
from pathlib import Path

# ===== SET UP LOGGER =====
logger = logging.getLogger(__name__)
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
sh = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
sh.setFormatter(formatter)
root_logger.addHandler(sh)
# ===== END LOGGER SETUP =====

desired_width = 320
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', desired_width)


def main():

    df = app.preproc_data()
    rgb_arr = df[['r', 'g', 'b']].values

    for algo in [PCA, FastICA, LatentDirichletAllocation, TSNE]:
        if algo != TSNE:  # Decomposition methods like PCA expect shapes in reverse versus t_SNE; so the arrays have to be transposed!
            reducer = algo(n_components=2)
            reducer.fit(rgb_arr.transpose())
            vals = reducer.components_.transpose()
        else:
            reducer = algo(n_components=2)
            vals = reducer.fit_transform(rgb_arr)
        df['red_a'] = vals[:, 0]
        df['red_b'] = vals[:, 1]

        fig = px.scatter(df, x='red_a', y='red_b',
                         title=f'RGB values represented in 2D using {algo.__name__}',
                         template='plotly_white',
                         color=df['simple_name'],
                         color_discrete_sequence=df['rgb'],
                         size='size',
                         width=800, height=600,
                         hover_data=['name'])
        fig.update_layout(
            showlegend=False,
            margin=dict(l=5, r=5, t=40, b=5)
        )
        img = fig.to_image('jpg', scale=2)
        with open(f'temp/dim_red_{algo.__name__}.jpg', 'wb') as f:
            f.write(img)

    return True


if __name__ == '__main__':
    main()