Spaces:
Runtime error
Runtime error
File size: 2,030 Bytes
f1a15ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# ========== (c) JP Hwang 30/9/2022 ==========
import logging
import pandas as pd
import app
from sklearn.decomposition import PCA, FastICA, LatentDirichletAllocation
from sklearn.manifold import TSNE
import plotly.express as px
from pathlib import Path
# ===== SET UP LOGGER =====
logger = logging.getLogger(__name__)
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
sh = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
sh.setFormatter(formatter)
root_logger.addHandler(sh)
# ===== END LOGGER SETUP =====
desired_width = 320
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', desired_width)
def main():
df = app.preproc_data()
rgb_arr = df[['r', 'g', 'b']].values
for algo in [PCA, FastICA, LatentDirichletAllocation, TSNE]:
if algo != TSNE: # Decomposition methods like PCA expect shapes in reverse versus t_SNE; so the arrays have to be transposed!
reducer = algo(n_components=2)
reducer.fit(rgb_arr.transpose())
vals = reducer.components_.transpose()
else:
reducer = algo(n_components=2)
vals = reducer.fit_transform(rgb_arr)
df['red_a'] = vals[:, 0]
df['red_b'] = vals[:, 1]
fig = px.scatter(df, x='red_a', y='red_b',
title=f'RGB values represented in 2D using {algo.__name__}',
template='plotly_white',
color=df['simple_name'],
color_discrete_sequence=df['rgb'],
size='size',
width=800, height=600,
hover_data=['name'])
fig.update_layout(
showlegend=False,
margin=dict(l=5, r=5, t=40, b=5)
)
img = fig.to_image('jpg', scale=2)
with open(f'temp/dim_red_{algo.__name__}.jpg', 'wb') as f:
f.write(img)
return True
if __name__ == '__main__':
main()
|