# ========== (c) JP Hwang 30/9/2022 ========== import logging import pandas as pd import app from sklearn.decomposition import PCA, FastICA, LatentDirichletAllocation from sklearn.manifold import TSNE import plotly.express as px from pathlib import Path # ===== SET UP LOGGER ===== logger = logging.getLogger(__name__) root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) sh = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') sh.setFormatter(formatter) root_logger.addHandler(sh) # ===== END LOGGER SETUP ===== desired_width = 320 pd.set_option('display.max_columns', 20) pd.set_option('display.width', desired_width) def main(): df = app.preproc_data() rgb_arr = df[['r', 'g', 'b']].values for algo in [PCA, FastICA, LatentDirichletAllocation, TSNE]: if algo != TSNE: # Decomposition methods like PCA expect shapes in reverse versus t_SNE; so the arrays have to be transposed! reducer = algo(n_components=2) reducer.fit(rgb_arr.transpose()) vals = reducer.components_.transpose() else: reducer = algo(n_components=2) vals = reducer.fit_transform(rgb_arr) df['red_a'] = vals[:, 0] df['red_b'] = vals[:, 1] fig = px.scatter(df, x='red_a', y='red_b', title=f'RGB values represented in 2D using {algo.__name__}', template='plotly_white', color=df['simple_name'], color_discrete_sequence=df['rgb'], size='size', width=800, height=600, hover_data=['name']) fig.update_layout( showlegend=False, margin=dict(l=5, r=5, t=40, b=5) ) img = fig.to_image('jpg', scale=2) with open(f'temp/dim_red_{algo.__name__}.jpg', 'wb') as f: f.write(img) return True if __name__ == '__main__': main()