Spaces:
Running
Running
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| from wordcloud import WordCloud, STOPWORDS | |
| import matplotlib.pyplot as plt | |
| # Caching data loading | |
| def load_data(): | |
| df = pd.read_csv("mafindo_mix_llm.csv") | |
| return df | |
| # Caching WordCloud generation | |
| def generate_wordcloud(text, colormap, stopwords): | |
| wordcloud = WordCloud(width=500, height=200, background_color='white', colormap=colormap, stopwords=stopwords).generate(text) | |
| return wordcloud | |
| def show_home(): | |
| # Load the dataset | |
| df = load_data() | |
| # Convert 'Tanggal' to datetime | |
| df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y') | |
| df['Year'] = df['Tanggal'].dt.year | |
| # Convert text columns to string to avoid type errors | |
| df['Content'] = df['Content'].astype(str) | |
| # Define additional stopwords | |
| additional_stopwords = {"dan", "di", "yang", "ke", "dari", "untuk", "pada", "adalah", "sebuah", "dengan", "tersebut", "ini", "itu", "atau", "dalam", "juga", "adalah", "yg", "tapi"} | |
| # Combine default stopwords with additional stopwords | |
| combined_stopwords = set(STOPWORDS).union(additional_stopwords) | |
| # Row with 4 visualizations | |
| col1, col2, col3, col4 = st.columns([1.5, 2.5, 1.5, 2.5]) | |
| # Visualization 1: Bar chart for Hoax vs Non-Hoax using Plotly | |
| with col1: | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax</h6>", unsafe_allow_html=True) | |
| df_label_counts = df['Label'].value_counts().reset_index() | |
| df_label_counts.columns = ['Label', 'Jumlah'] | |
| bar_chart_label = px.bar(df_label_counts, x='Label', y='Jumlah', color='Label', | |
| color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'}) | |
| bar_chart_label.update_layout( | |
| width=200, height=150, xaxis_title='Label', yaxis_title='Jumlah', | |
| xaxis_title_font_size=10, yaxis_title_font_size=10, | |
| xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10), | |
| showlegend=False | |
| ) | |
| st.plotly_chart(bar_chart_label, use_container_width=False) | |
| # Visualization 2: Bar chart for Hoax vs Non-Hoax per Data Source using Plotly | |
| with col2: | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax per Data Source</h6>", unsafe_allow_html=True) | |
| datasource_label_counts = df.groupby(['Datasource', 'Label']).size().reset_index(name='counts') | |
| fig_datasource = px.bar(datasource_label_counts, x='Datasource', y='counts', color='Label', barmode='group', | |
| color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'}) | |
| fig_datasource.update_layout( | |
| width=500, height=150, xaxis_title='Datasource', yaxis_title='Jumlah', | |
| xaxis_title_font_size=10, yaxis_title_font_size=10, | |
| xaxis_tickfont_size=6, yaxis_tickfont_size=8, xaxis_tickangle=0, | |
| margin=dict(t=10, b=10, l=10, r=50), | |
| legend=dict( | |
| font=dict(size=8), # Smaller font size for the legend | |
| traceorder='normal', | |
| orientation='v', # Vertical orientation of the legend | |
| title_text='Label', # Title for the legend | |
| yanchor='top', y=1, xanchor='left', x=1.05, # Adjust position of the legend | |
| bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend | |
| bordercolor='rgba(0, 0, 0, 0)' # No border color | |
| ), | |
| showlegend=True | |
| ) | |
| st.plotly_chart(fig_datasource, use_container_width=False) | |
| # Visualization 3: Line chart for Hoax per Year using Plotly | |
| with col3: | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax per Tahun</h6>", unsafe_allow_html=True) | |
| # Filter data to include only years up to 2023 | |
| hoax_per_year = df[(df['Label'] == 'HOAX') & (df['Year'] <= 2023)].groupby('Year').size().reset_index(name='count') | |
| line_chart_hoax = px.line(hoax_per_year, x='Year', y='count', line_shape='linear', | |
| color_discrete_sequence=['red']) | |
| line_chart_hoax.update_layout( | |
| width=200, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Hoax', | |
| xaxis_title_font_size=10, yaxis_title_font_size=10, | |
| xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10), | |
| showlegend=False | |
| ) | |
| st.plotly_chart(line_chart_hoax, use_container_width=False) | |
| # Visualization 4: Bar chart for Topics per Year using Plotly | |
| with col4: | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Topik per Tahun</h6>", unsafe_allow_html=True) | |
| df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y') | |
| df['Year'] = df['Tanggal'].dt.year | |
| # Filter the data to include only years up to 2023 | |
| df_mafindo_filtered = df[df['Year'] <= 2023] | |
| topics_per_year = df_mafindo_filtered.groupby(['Year', 'Topic']).size().reset_index(name='count') | |
| # Create the vertical bar chart | |
| bar_chart_topics = px.bar(topics_per_year, x='Year', y='count', color='Topic', | |
| color_continuous_scale=px.colors.sequential.Viridis) | |
| # Update layout to adjust the legend | |
| bar_chart_topics.update_layout( | |
| width=600, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Topik', | |
| xaxis_title_font_size=10, yaxis_title_font_size=10, | |
| xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10), | |
| showlegend=True, | |
| legend=dict( | |
| yanchor="top", y=1, xanchor="left", x=1.02, # Adjust position of the legend | |
| bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend | |
| bordercolor='rgba(0, 0, 0, 0)', # No border color | |
| itemclick='toggleothers', # Allow toggling of legend items | |
| itemsizing='constant', # Consistent sizing for legend items | |
| font=dict(size=8), | |
| traceorder='normal', | |
| orientation='v', # Vertical orientation of legend | |
| title_text='Topic' | |
| ) | |
| ) | |
| st.plotly_chart(bar_chart_topics, use_container_width=True) | |
| # Create a new row for WordCloud visualizations | |
| col5, col6, col7 = st.columns([2, 2.5, 2.5]) | |
| # Wordcloud for Hoax | |
| with col5: | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Wordcloud for Hoax</h6>", unsafe_allow_html=True) | |
| hoax_text = ' '.join(df[df['Label'] == 'HOAX']['Content']) | |
| wordcloud_hoax = generate_wordcloud(hoax_text, 'Reds', combined_stopwords) | |
| fig_hoax = plt.figure(figsize=(5, 2.5)) | |
| plt.imshow(wordcloud_hoax, interpolation='bilinear') | |
| plt.axis('off') | |
| st.pyplot(fig_hoax) | |
| with col6: | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Klasifikasi</h6>", unsafe_allow_html=True) | |
| df_classification_counts = df['Classification'].value_counts().reset_index() | |
| df_classification_counts.columns = ['Classification', 'Count'] | |
| # Create the donut chart | |
| donut_chart_classification = px.pie(df_classification_counts, names='Classification', values='Count', | |
| hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2) | |
| # Update layout to move the legend and adjust its size | |
| donut_chart_classification.update_layout( | |
| width=300, height=170, # Adjust the size of the chart | |
| margin=dict(t=20, b=20, l=20, r=120), # Adjust margins to make room for the legend | |
| legend=dict( | |
| yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend | |
| bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend | |
| bordercolor='rgba(0, 0, 0, 0)', # No border color | |
| itemclick='toggleothers', # Allow toggling of legend items | |
| itemsizing='constant', # Consistent sizing for legend items | |
| font=dict(size=8), # Smaller font size for the legend | |
| traceorder='normal', | |
| orientation='v', # Vertical legend | |
| title_text='Classification' # Title for the legend | |
| ) | |
| ) | |
| st.plotly_chart(donut_chart_classification, use_container_width=True) | |
| with col7: | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Tone</h6>", unsafe_allow_html=True) | |
| df_tone_counts = df['Tone'].value_counts().reset_index() | |
| df_tone_counts.columns = ['Tone', 'Count'] | |
| # Create the donut chart | |
| donut_chart_tone = px.pie(df_tone_counts, names='Tone', values='Count', | |
| hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2) | |
| # Update layout to move the legend and adjust its size | |
| donut_chart_tone.update_layout( | |
| width=250, height=170, # Adjust the size of the chart | |
| margin=dict(t=20, b=20, l=20, r=100), # Adjust margins to make room for the legend | |
| legend=dict( | |
| yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend | |
| bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend | |
| bordercolor='rgba(0, 0, 0, 0)', # No border color | |
| itemclick='toggleothers', # Allow toggling of legend items | |
| itemsizing='constant', # Consistent sizing for legend items | |
| font=dict(size=8), # Smaller font size for the legend | |
| traceorder='normal', | |
| orientation='v', # Vertical legend | |
| title_text='Tone' # Title for the legend | |
| ) | |
| ) | |
| st.plotly_chart(donut_chart_tone, use_container_width=True) | |
| col8, col9 = st.columns([5, 1.5]) | |
| # Evaluation Metrics Table | |
| data = [ | |
| ["indobenchmark/indobert-base-p2", 0.6898, 0.9793, 0.8094, 0.8400, 0.1981, 0.3206, 0.7023], | |
| ["cahya/bert-base-indonesian-522M", 0.7545, 0.8756, 0.8106, 0.6800, 0.4811, 0.5635, 0.7358], | |
| ["indolem/indobert-base-uncased", 0.7536, 0.8238, 0.7871, 0.6136, 0.5094, 0.5567, 0.7124], | |
| ["mdhugol/indonesia-bert-sentiment-classification", 0.7444, 0.8601, 0.7981, 0.6447, 0.4623, 0.5385, 0.7191] | |
| ] | |
| highest_accuracy = max(data, key=lambda x: x[-1]) | |
| # Header Table | |
| html_table = """ | |
| <table style="width:100%; border-collapse: collapse; font-size: 12px; border-top: 1px solid black; border-bottom: 1px solid black;"> | |
| <tr style="border-bottom: 1px solid black; text-align: center; border-top: 1px solid black;"> | |
| <th rowspan="2" style="border: none; padding: 5px; font-size: 14px; text-align: left; border-top: 1px solid black;">Pre-trained Model</th> | |
| <th colspan="3" style="border: none; padding: 5px; font-size: 14px; text-align: center; border-top: 1px solid black;">NON-HOAX</th> | |
| <th colspan="3" style="border: none; padding: 5px; font-size: 14px; text-align: center; border-top: 1px solid black;">HOAX</th> | |
| <th rowspan="2" style="border: none; padding: 5px; font-size: 14px; text-align: center; border-top: 1px solid black;">Accuracy</th> | |
| </tr> | |
| <tr style="border-bottom: 1px solid black;"> | |
| <th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th> | |
| <th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th> | |
| <th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th> | |
| <th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th> | |
| <th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th> | |
| <th style="border: none; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th> | |
| </tr> | |
| """ | |
| # Isi Data | |
| for row in data: | |
| formatted_row = [f"{item:.4f}" if isinstance(item, float) else item for item in row] | |
| if row == highest_accuracy: | |
| html_table += "<tr style='background-color: #FC9576; font-size: 12px; text-align: center; border: 1px solid transparent;'>" | |
| else: | |
| html_table += "<tr style='font-size: 12px; text-align: center; border: 1px solid transparent;'>" | |
| # Left-align the first column (Pre-trained Model) | |
| html_table += f"<td style='border: none; padding: 5px; text-align: left; font-size: 12px;'>{row[0]}</td>" | |
| # Center-align the rest of the columns | |
| for item in formatted_row[1:]: | |
| html_table += f"<td style='border: none; padding: 5px; text-align: center; font-size: 12px;'>{item}</td>" | |
| html_table += "</tr>" | |
| # Add a border to the last row | |
| html_table += "<tr style='border-top: 1px solid black;'></tr>" | |
| html_table += "</table>" | |
| # Tampilkan Tabel di Streamlit | |
| with col8: | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Matriks Evaluasi</h6>", unsafe_allow_html=True) | |
| st.markdown(html_table, unsafe_allow_html=True) | |
| html_table_col9 = """ | |
| <table style="width:100%; border-collapse: collapse; font-size: 12px;"> | |
| <thead> | |
| <tr style="border-top: 1.5px solid #B2BABB; border-bottom: 1.5px solid #B2BABB;"> | |
| <th style="padding: 8px; border: 1px solid transparent; font-weight: bold; background-color: #f2f2f2; text-align: left;">Label</th> | |
| <th style="padding: 8px; border: 1px solid transparent; font-weight: bold; background-color: #f2f2f2; text-align: center;">Train</th> | |
| <th style="padding: 8px; border: 1px solid transparent; font-weight: bold; background-color: #f2f2f2; text-align: center;">Test</th> | |
| <th style="padding: 8px; border: 1px solid transparent; font-weight: bold; background-color: #f2f2f2; text-align: center;">Dev</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <tr style="border-bottom: 1px solid transparent;"> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2;">HOAX</td> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2; text-align: center;">11.563</td> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2; text-align: center;">193</td> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2; text-align: center;">193</td> | |
| </tr> | |
| <tr style="border-bottom: 1px solid black;"> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2;">NON-HOAX</td> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2; text-align: center;">789</td> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2; text-align: center;">106</td> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2; text-align: center;">106</td> | |
| </tr> | |
| <tr style="font-weight: bold; border-top: 1px solid transparent; border-bottom: 1.5px solid #B2BABB;"> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2;">TOTAL</td> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2; text-align: center;">12,352</td> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2; text-align: center;">299</td> | |
| <td style="padding: 8px; border: 1px solid transparent; background-color: #f2f2f2; text-align: center;">299</td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| """ | |
| # Display the table in col9 using HTML | |
| with col9: | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Statistik Data</h6>", unsafe_allow_html=True) | |
| st.markdown(html_table_col9, unsafe_allow_html=True) |