Spaces:

nlp-brin-id
/

deteksihoax

Running

App Files Files Community

Nakhwa commited on Aug 24, 2024

Commit

696314f

verified ·

1 Parent(s): 7cb4f6a

Upload 10 files

Browse files

Files changed (11) hide show

.gitattributes +1 -0
app.py +21 -0
deteksi_content.py +149 -0
deteksi_upload.py +170 -0
home.py +285 -0
inbound-source-431806-g7-e49e388ce0be.json +13 -0
load_model.py +19 -0
mafindo_mix_llm.csv +3 -0
requirements.txt +11 -0
styles.py +43 -0
test.py +63 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+mafindo_mix_llm.csv filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import streamlit as st
+# Set page configuration
+st.set_page_config(page_title="Hoax Detection Dashboard", layout="wide")
+st.title("Dashboard Deteksi Berita Hoax")
+from home import show_home
+from deteksi_content import show_deteksi_konten
+from deteksi_upload import show_deteksi_upload
+# Create tabs
+tab1, tab2, tab3 = st.tabs(["Home", "Deteksi Konten", "Deteksi File"])
+with tab1:
+    show_home()
+with tab2:
+    show_deteksi_konten()
+with tab3:
+    show_deteksi_upload()

deteksi_content.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import streamlit as st
+from datetime import datetime
+import pandas as pd
+from lime.lime_text import LimeTextExplainer
+from test import predict_hoax, predict_proba_for_lime
+import streamlit.components.v1 as components
+from load_model import load_model
+from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
+from styles import COMMON_CSS
+from google.cloud import storage
+import os
+from io import StringIO
+# Set environment variable for Google Cloud credentials
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:\DashboardHoax\inbound-source-431806-g7-e49e388ce0be.json"
+def save_corrections_to_gcs(bucket_name, file_name, correction_data):
+    client = storage.Client()  # Uses the credentials set by the environment variable
+    bucket = client.bucket("dashboardhoax-bucket")
+    blob = bucket.blob("koreksi_pengguna_content.csv")
+    # Check if the blob (file) exists
+    if blob.exists():
+        # Download existing CSV from GCS
+        existing_data = blob.download_as_string().decode('utf-8')
+        existing_df = pd.read_csv(StringIO(existing_data))
+    else:
+        # Create a new DataFrame if the file does not exist
+        existing_df = pd.DataFrame(columns=['Timestamp', 'Title', 'Content', 'Prediction', 'Correction'])
+    # Append the new data to the existing data
+    new_data_df = pd.DataFrame(correction_data)
+    updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
+    # Convert the DataFrame back to CSV and upload
+    updated_csv_data = updated_df.to_csv(index=False)
+    blob.upload_from_string(updated_csv_data, content_type='text/csv')
+def show_deteksi_kontengcs():
+    st.markdown(COMMON_CSS, unsafe_allow_html=True)
+    if 'correction' not in st.session_state:
+        st.session_state.correction = None
+    if 'detection_result' not in st.session_state:
+        st.session_state.detection_result = None
+    if 'lime_explanation' not in st.session_state:
+        st.session_state.lime_explanation = None
+    if 'headline' not in st.session_state:
+        st.session_state.headline = ""
+    if 'content' not in st.session_state:
+        st.session_state.content = ""
+    if 'is_correct' not in st.session_state:
+        st.session_state.is_correct = None
+    # Dropdown for selecting a model
+    st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
+    selected_model = st.selectbox(
+        "",
+        [
+            "cahya/bert-base-indonesian-522M",
+            "indobenchmark/indobert-base-p2",
+            "indolem/indobert-base-uncased",
+            "mdhugol/indonesia-bert-sentiment-classification"
+        ],
+        key="model_selector_content"
+    )
+    # Load the selected model
+    tokenizer, model = load_model(selected_model)
+    st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Masukkan Judul Berita :</h6>", unsafe_allow_html=True)
+    st.session_state.headline = st.text_input("", value=st.session_state.headline)
+    st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Masukkan Konten Berita :</h6>", unsafe_allow_html=True)
+    st.session_state.content = st.text_area("", value=st.session_state.content)
+    # Detection button
+    if st.button("Deteksi", key="detect_content"):
+        st.session_state.detection_result = predict_hoax(st.session_state.headline, st.session_state.content)
+        st.success(f"Prediksi: {st.session_state.detection_result}")
+        # Prepare the text for LIME
+        lime_texts = [f"{st.session_state.headline} [SEP] {st.session_state.content}"]
+        # Add a spinner and progress bar to indicate processing
+        with st.spinner("Sedang memproses LIME, harap tunggu..."):
+            # Explain the prediction
+            explainer = LimeTextExplainer(class_names=['NON-HOAX', 'HOAX'])
+            explanation = explainer.explain_instance(lime_texts[0], predict_proba_for_lime, num_features=5, num_samples=1000)
+            # Save the LIME explanation in session state
+            st.session_state.lime_explanation = explanation.as_html()
+    # Display the detection result and LIME explanation if available
+    if st.session_state.lime_explanation:
+        lime_html = st.session_state.lime_explanation
+        # Inject CSS for font size adjustment
+        lime_html = f"""
+        <style>
+        .lime-text-explanation, .lime-highlight, .lime-classification,
+        .lime-text-explanation * {{
+            font-size: 14px !important;
+        }}
+        </style>
+        <div class="lime-text-explanation">
+            {lime_html}
+        </div>
+        """
+        components.html(lime_html, height=200, scrolling=True)
+    # Display a radio button asking if the detection result is correct
+    if st.session_state.detection_result is not None:
+        st.markdown("<h6 style='font-size: 16px; margin-bottom: -150px;'>Apakah hasil deteksi sudah benar?</h6>", unsafe_allow_html=True)
+        st.session_state.is_correct = st.radio("", ("Ya", "Tidak"))
+        if st.session_state.is_correct == "Ya":
+            st.success("Deteksi sudah benar.")
+        else:
+            # Determine the correction based on the prediction
+            st.session_state.correction = "HOAX" if st.session_state.detection_result == "NON-HOAX" else "NON-HOAX"
+            # Display the correction DataFrame
+            correction_data = [{
+                'Title': st.session_state.headline,
+                'Content': st.session_state.content,
+                'Prediction': st.session_state.detection_result,
+                'Correction': st.session_state.correction,
+                'Timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            }]
+            # Save button
+            if st.button("Simpan"):
+                # Save the correction data to GCS
+                save_corrections_to_gcs("your-bucket-name", "koreksi_pengguna.csv", correction_data)
+                # Create a formatted string with CSS for alignment and multi-line content handling
+                formatted_text = f"""
+                <div style='font-size: 14px;'>
+                    <p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Title</span> : <span style='white-space: pre-wrap;'>{st.session_state.headline}</span></p>
+                    <p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Content</span> : <span style='white-space: pre-wrap;'>{st.session_state.content}</span></p>
+                    <p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Prediction</span> : {st.session_state.detection_result}</p>
+                    <p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Correction</span> : {st.session_state.correction}</p>
+                </div>
+                """
+                # Display the correction as text
+                st.markdown(formatted_text, unsafe_allow_html=True)
+                st.success("Koreksi telah disimpan.")

deteksi_upload.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import streamlit as st
+import pandas as pd
+from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
+from test import predict_hoax, evaluate_model_performance
+from load_model import load_model
+from styles import COMMON_CSS
+from google.cloud import storage
+from io import StringIO
+import os
+from datetime import datetime
+# Set environment variable for Google Cloud credentials
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:/DashboardHoax/inbound-source-431806-g7-e49e388ce0be.json"
+def save_corrections_to_gcs(bucket_name, file_name, correction_data):
+    client = storage.Client()
+    bucket = client.bucket("dashboardhoax-bucket")
+    blob = bucket.blob("koreksi_pengguna_file.csv")
+    # Check if the blob (file) exists
+    if blob.exists():
+        # Download existing CSV from GCS
+        existing_data = blob.download_as_string().decode('utf-8')
+        existing_df = pd.read_csv(StringIO(existing_data))
+    else:
+        # Create a new DataFrame if the file does not exist
+        existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
+    # Append the new data to the existing data
+    new_data_df = pd.DataFrame(correction_data)
+    updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
+    # Convert the DataFrame back to CSV and upload
+    updated_csv_data = updated_df.to_csv(index=False)
+    blob.upload_from_string(updated_csv_data, content_type='text/csv')
+def load_data(file):
+    return pd.read_csv(file)
+def show_deteksi_uploadgcs():
+    st.markdown(COMMON_CSS, unsafe_allow_html=True)
+    st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
+    selected_model = st.selectbox(
+        "",
+        [
+            "cahya/bert-base-indonesian-522M",
+            "indobenchmark/indobert-base-p2",
+            "indolem/indobert-base-uncased",
+            "mdhugol/indonesia-bert-sentiment-classification"
+        ],
+        key="model_selector_upload"
+    )
+    tokenizer, model = load_model(selected_model)
+    st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
+    uploaded_file = st.file_uploader("", type="csv")
+    if 'df' not in st.session_state:
+        st.session_state.df = None
+    if uploaded_file is not None:
+        df = load_data(uploaded_file)
+        df.index = df.index + 1
+        st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)
+        grid_options = GridOptionsBuilder.from_dataframe(df)
+        grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
+        gridOptions = grid_options.build()
+        AgGrid(
+            df,
+            gridOptions=gridOptions,
+            update_mode=GridUpdateMode.VALUE_CHANGED,
+            use_container_width=True
+        )
+        if st.button("Deteksi", key="detect_upload"):
+            try:
+                df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
+                df['Correction'] = False
+                st.session_state.df = df.copy()
+            except Exception as e:
+                st.error(f"Terjadi kesalahan saat deteksi: {e}")
+    if st.session_state.df is not None:
+        accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
+        performance_text = (
+            f"*Performansi Model*\n\n"
+            f"*Accuracy:* {round(accuracy, 2)}&nbsp;&nbsp;"
+            f"*Precision:* {round(precision, 2)}&nbsp;&nbsp;"
+            f"*Recall:* {round(recall, 2)}&nbsp;&nbsp;"
+            f"*F1 Score:* {round(f1, 2)}"
+        )
+        st.success(performance_text)
+        st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
+        cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
+        df_reordered = st.session_state.df[cols]
+        grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
+        grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
+        grid_options.configure_default_column(editable=True, groupable=True)
+        gridOptions = grid_options.build()
+        grid_response = AgGrid(
+            st.session_state.df,
+            gridOptions=gridOptions,
+            update_mode=GridUpdateMode.VALUE_CHANGED
+        )
+        if grid_response['data'] is not None:
+            edited_df = pd.DataFrame(grid_response['data'])
+            st.session_state.df = edited_df.copy()
+            corrected_df = edited_df[edited_df['Correction']].copy()
+            edited_df['Result_Correction'] = edited_df.apply(lambda row:
+                'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
+                ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
+                axis=1
+            )
+            st.session_state.df = edited_df.copy()
+            if not corrected_df.empty:
+                corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
+                    'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
+                    ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
+                    axis=1
+                )
+                # Add Timestamp only for saving
+                corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                cols = ['Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction']
+                corrected_df_to_display = corrected_df[cols]
+                st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
+                st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
+            else:
+                st.write("Tidak ada data yang dikoreksi.")
+        if st.button("Simpan", key="corrected_data"):
+            if 'df' in st.session_state:
+                corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
+                corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                corrected_df = corrected_df.drop(columns=['Correction'])
+                if not corrected_df.empty:
+                    # Define GCS bucket and file name
+                    bucket_name = "your-bucket-name"
+                    file_name = "corrected_upload_data.csv"
+                    # Convert DataFrame to list of dicts for GCS
+                    correction_data = corrected_df.to_dict(orient='records')
+                    # Save corrected data to GCS
+                    save_corrections_to_gcs(bucket_name, file_name, correction_data)
+                    st.success("Data telah disimpan.")
+                    st.session_state.corrected_df = corrected_df
+                else:
+                    st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
+            else:
+                st.warning("Data deteksi tidak ditemukan.")

home.py ADDED Viewed

	@@ -0,0 +1,285 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+from wordcloud import WordCloud, STOPWORDS
+import matplotlib.pyplot as plt
+# Caching data loading
+@st.cache_data
+def load_data():
+    df = pd.read_csv("mafindo_mix_llm.csv")
+    return df
+# Caching WordCloud generation
+@st.cache_resource
+def generate_wordcloud(text, colormap, stopwords):
+    wordcloud = WordCloud(width=500, height=200, background_color='white', colormap=colormap, stopwords=stopwords).generate(text)
+    return wordcloud
+def show_home():
+    # Load the dataset
+    df = load_data()
+    # Convert 'Tanggal' to datetime
+    df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y')
+    df['Year'] = df['Tanggal'].dt.year
+    # Convert text columns to string to avoid type errors
+    df['Content'] = df['Content'].astype(str)
+    # Define additional stopwords
+    additional_stopwords = {"dan", "di", "yang", "ke", "dari", "untuk", "pada", "adalah", "sebuah", "dengan", "tersebut", "ini", "itu", "atau", "dalam", "juga", "adalah", "yg", "tapi"}
+    # Combine default stopwords with additional stopwords
+    combined_stopwords = set(STOPWORDS).union(additional_stopwords)
+    # Row with 4 visualizations
+    col1, col2, col3, col4 = st.columns([1.5, 2.5, 1.5, 2.5])
+    # Visualization 1: Bar chart for Hoax vs Non-Hoax using Plotly
+    with col1:
+        st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax</h6>", unsafe_allow_html=True)
+        df_label_counts = df['Label'].value_counts().reset_index()
+        df_label_counts.columns = ['Label', 'Jumlah']
+        bar_chart_label = px.bar(df_label_counts, x='Label', y='Jumlah', color='Label',
+                                color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'})
+        bar_chart_label.update_layout(
+            width=200, height=150, xaxis_title='Label', yaxis_title='Jumlah',
+            xaxis_title_font_size=10, yaxis_title_font_size=10,
+            xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
+            showlegend=False
+        )
+        st.plotly_chart(bar_chart_label, use_container_width=False)
+    # Visualization 2: Bar chart for Hoax vs Non-Hoax per Data Source using Plotly
+    with col2:
+        st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax per Data Source</h6>", unsafe_allow_html=True)
+        datasource_label_counts = df.groupby(['Datasource', 'Label']).size().reset_index(name='counts')
+        fig_datasource = px.bar(datasource_label_counts, x='Datasource', y='counts', color='Label', barmode='group',
+                               color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'})
+        fig_datasource.update_layout(
+            width=500, height=150, xaxis_title='Datasource', yaxis_title='Jumlah',
+            xaxis_title_font_size=10, yaxis_title_font_size=10,
+            xaxis_tickfont_size=6, yaxis_tickfont_size=8, xaxis_tickangle=0,
+            margin=dict(t=10, b=10, l=10, r=50),
+            legend=dict(
+                font=dict(size=8),  # Smaller font size for the legend
+                traceorder='normal',
+                orientation='v',  # Vertical orientation of the legend
+                title_text='Label',  # Title for the legend
+                yanchor='top', y=1, xanchor='left', x=1.05,  # Adjust position of the legend
+                bgcolor='rgba(255, 255, 255, 0)',  # Transparent background for legend
+                bordercolor='rgba(0, 0, 0, 0)'  # No border color
+            ),
+            showlegend=True
+        )
+        st.plotly_chart(fig_datasource, use_container_width=False)
+    # Visualization 3: Line chart for Hoax per Year using Plotly
+    with col3:
+        st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax per Tahun</h6>", unsafe_allow_html=True)
+    # Filter data to include only years up to 2023
+        hoax_per_year = df[(df['Label'] == 'HOAX') & (df['Year'] <= 2023)].groupby('Year').size().reset_index(name='count')
+        line_chart_hoax = px.line(hoax_per_year, x='Year', y='count', line_shape='linear',
+                              color_discrete_sequence=['red'])
+        line_chart_hoax.update_layout(
+            width=200, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Hoax',
+            xaxis_title_font_size=10, yaxis_title_font_size=10,
+            xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
+            showlegend=False
+        )
+        st.plotly_chart(line_chart_hoax, use_container_width=False)
+    # Visualization 4: Bar chart for Topics per Year using Plotly
+    with col4:
+        st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Topik per Tahun</h6>", unsafe_allow_html=True)
+        df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y')
+        df['Year'] = df['Tanggal'].dt.year
+        # Filter the data to include only years up to 2023
+        df_mafindo_filtered = df[df['Year'] <= 2023]
+        topics_per_year = df_mafindo_filtered.groupby(['Year', 'Topic']).size().reset_index(name='count')
+        # Create the vertical bar chart
+        bar_chart_topics = px.bar(topics_per_year, x='Year', y='count', color='Topic',
+                                  color_continuous_scale=px.colors.sequential.Viridis)
+        # Update layout to adjust the legend
+        bar_chart_topics.update_layout(
+            width=600, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Topik',
+            xaxis_title_font_size=10, yaxis_title_font_size=10,
+            xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
+            showlegend=True,
+            legend=dict(
+                yanchor="top", y=1, xanchor="left", x=1.02,  # Adjust position of the legend
+                bgcolor='rgba(255, 255, 255, 0)',  # Transparent background for legend
+                bordercolor='rgba(0, 0, 0, 0)',  # No border color
+                itemclick='toggleothers',  # Allow toggling of legend items
+                itemsizing='constant',  # Consistent sizing for legend items
+                font=dict(size=8),
+                traceorder='normal',
+                orientation='v',  # Vertical orientation of legend
+                title_text='Topic'
+            )
+        )
+        st.plotly_chart(bar_chart_topics, use_container_width=True)
+    # Create a new row for WordCloud visualizations
+    col5, col6, col7 = st.columns([2, 2.5, 2.5])
+    # Wordcloud for Hoax
+    with col5:
+        st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Wordcloud for Hoax</h6>", unsafe_allow_html=True)
+        hoax_text = ' '.join(df[df['Label'] == 'HOAX']['Content'])
+        wordcloud_hoax = generate_wordcloud(hoax_text, 'Reds', combined_stopwords)
+        fig_hoax = plt.figure(figsize=(5, 2.5))
+        plt.imshow(wordcloud_hoax, interpolation='bilinear')
+        plt.axis('off')
+        st.pyplot(fig_hoax)
+    with col6:
+        st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Klasifikasi</h6>", unsafe_allow_html=True)
+        df_classification_counts = df['Classification'].value_counts().reset_index()
+        df_classification_counts.columns = ['Classification', 'Count']
+        # Create the donut chart
+        donut_chart_classification = px.pie(df_classification_counts, names='Classification', values='Count',
+                                        hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2)
+        # Update layout to move the legend and adjust its size
+        donut_chart_classification.update_layout(
+            width=300, height=170,  # Adjust the size of the chart
+            margin=dict(t=20, b=20, l=20, r=120),  # Adjust margins to make room for the legend
+            legend=dict(
+                yanchor="top", y=1, xanchor="left", x=1.07,  # Adjust position of the legend
+                bgcolor='rgba(255, 255, 255, 0)',  # Transparent background for legend
+                bordercolor='rgba(0, 0, 0, 0)',  # No border color
+                itemclick='toggleothers',  # Allow toggling of legend items
+                itemsizing='constant',  # Consistent sizing for legend items
+                font=dict(size=8),  # Smaller font size for the legend
+                traceorder='normal',
+                orientation='v',  # Vertical legend
+                title_text='Classification'  # Title for the legend
+            )
+        )
+        st.plotly_chart(donut_chart_classification, use_container_width=True)
+    with col7:
+        st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Tone</h6>", unsafe_allow_html=True)
+        df_tone_counts = df['Tone'].value_counts().reset_index()
+        df_tone_counts.columns = ['Tone', 'Count']
+        # Create the donut chart
+        donut_chart_tone = px.pie(df_tone_counts, names='Tone', values='Count',
+                                        hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2)
+        # Update layout to move the legend and adjust its size
+        donut_chart_tone.update_layout(
+            width=250, height=170,  # Adjust the size of the chart
+            margin=dict(t=20, b=20, l=20, r=100),  # Adjust margins to make room for the legend
+            legend=dict(
+                yanchor="top", y=1, xanchor="left", x=1.07,  # Adjust position of the legend
+                bgcolor='rgba(255, 255, 255, 0)',  # Transparent background for legend
+                bordercolor='rgba(0, 0, 0, 0)',  # No border color
+                itemclick='toggleothers',  # Allow toggling of legend items
+                itemsizing='constant',  # Consistent sizing for legend items
+                font=dict(size=8),  # Smaller font size for the legend
+                traceorder='normal',
+                orientation='v',  # Vertical legend
+                title_text='Tone'  # Title for the legend
+            )
+        )
+        st.plotly_chart(donut_chart_tone, use_container_width=True)
+    # Evaluation Metrics Table
+    data = [
+        ["indobenchmark/indobert-base-p2", 0.6898, 0.9793, 0.8094, 0.8400, 0.1981, 0.3206, 0.7023],
+        ["cahya/bert-base-indonesian-522M", 0.7545, 0.8756, 0.8106, 0.6800, 0.4811, 0.5635, 0.7358],
+        ["indolem/indobert-base-uncased", 0.7536, 0.8238, 0.7871, 0.6136, 0.5094, 0.5567, 0.7124],
+        ["mdhugol/indonesia-bert-sentiment-classification", 0.7444, 0.8601, 0.7981, 0.6447, 0.4623, 0.5385, 0.7191]
+    ]
+    highest_accuracy = max(data, key=lambda x: x[-1])
+    # Header Table
+    html_table = """
+    <table style="width:100%; border-collapse: collapse; font-size: 12px;">
+    <tr>
+        <th rowspan="2" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">Pre-trained Model</th>
+        <th colspan="3" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">NON-HOAX</th>
+        <th colspan="3" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">HOAX</th>
+        <th rowspan="2" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">Accuracy</th>
+    </tr>
+    <tr>
+        <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th>
+        <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th>
+        <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th>
+        <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th>
+        <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th>
+        <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th>
+    </tr>
+    """
+    # Isi Data
+    for row in data:
+        if row == highest_accuracy:
+            html_table += "<tr style='background-color: #41B3A2; font-size: 12px;'>"
+        else:
+            html_table += "<tr style= ' font-size: 12px;'>"
+        for item in row:
+            html_table += f"<td style='border: 1px solid black; padding: 5px; font-size: 12px;'>{item}</td>"
+        html_table += "</tr>"
+    html_table += "</table>"
+    # Tampilkan Tabel di Streamlit
+    col8 = st.columns([5])
+    with col8[0]:
+        st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Evaluation Metrics</h6>", unsafe_allow_html=True)
+        st.markdown(html_table, unsafe_allow_html=True)
+    html_table_col9 = """
+    <div style='text-align: center;'>
+        <table style="width: 100%; margin: -5px 0; font-size: 12px; border-collapse: collapse; border: 1px solid black;">
+            <thead>
+                <tr style="background-color: #e0e0e0;">
+                    <th style="padding: 8px; border: 1px solid black; font-weight: bold;">Label</th>
+                    <th style="padding: 8px; border: 1px solid black; font-weight: bold;">Train</th>
+                    <th style="padding: 8px; border: 1px solid black; font-weight: bold;">Test</th>
+                    <th style="padding: 8px; border: 1px solid black; font-weight: bold;">Dev</th>
+                </tr>
+            </thead>
+            <tbody>
+                <tr style="border-bottom: 1px solid black;">
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">HOAX</td>
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">11,563</td>
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">193</td>
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">193</td>
+                </tr>
+                <tr style="border-bottom: 1px solid black;">
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">NON-HOAX</td>
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">789</td>
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">106</td>
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">106</td>
+                </tr>
+                <tr style="font-weight: bold; border-top: 1px solid black;">
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">TOTAL</td>
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">12,352</td>
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">299</td>
+                    <td style="padding: 8px; border: 1px solid black; text-align: center;">299</td>
+                </tr>
+            </tbody>
+        </table>
+    </div>
+    """
+    # Display the table in col9 using HTML
+    col9 = st.columns([1])  # Adjust the number and width of columns as needed
+    with col9[0]:
+        st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Statistik Data</h6>", unsafe_allow_html=True)
+        st.markdown(html_table_col9, unsafe_allow_html=True)

inbound-source-431806-g7-e49e388ce0be.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "type": "service_account",
+  "project_id": "inbound-source-431806-g7",
+  "private_key_id": "e49e388ce0bed9704aedad42a56d8e3982e0120f",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC2eXeqNjR8Gaeb\npUekLAbieWiqvxGak71OFj4t1/fBemDduW1tsjN1biZzosJR3KERpIWQ0z0vbDwt\n3zOuvdf7XxEZ09Iopp3TRqk/qrrAQzanLAhkAJ8K8czNAlkaZ3KYW2j9bU+xk4P8\nFNpoHZwFZJczLujFxULxIU90KGqigXdvkdvyevfH1mxLlCuXL6F6bFsHuF8ckt2Q\nWKQ4bVnHW8w6CymhmJVgFxX68HxoTbObeoaRzkd5kjJvdJ+A4MQbdzLyHxlPxxZn\nme1LICIQKlsUrk6MSHzMcrl8BN0lMj8k6DgIO2WD/uCXRwemAJl3YoJc5BZN2Luz\nI0DTsDB1AgMBAAECggEANEVga6BicYhR1IrIlnVMNZUM0BiyvMKEkHlbr3s1zDU3\nyVwkRi+tgP6gQjDGFHgspaao4j84wDxzkrplDjHwzF/DwM/GXIG6JTsRIZ1RKOE4\nJzQ8ZRUueg6hGbsJ9j/a+lz5Gtu04Av/W3dHx1pwBrV1gKJ36KtkzTk7Du3C+jC6\n4fQeTim1ebrIkj3Hu88lV4cDMJENWDnHFXj0ww8SygaNDoT7X9E96iRwubMiE7AK\nI8JNJqFsO1S7nQPDbp4KceQwjVOWE1djTxtxYhcFAGOSacKVKcLz0mcQstXvUzns\nUfIj1+2l8dxRsHMRkg+bllD99aXJ2PrLKBp13ia90wKBgQDqKAVBtlATOFWtsdmG\nPJ6i1+SzzokuGoQO7UwweLtiGXMelr1rdmGTbtsg+/OQC2sFfDJtyKM2xhcXc07U\nxGQZGrPaGmVEAMDujayxgEyw46Dm8H9phGUekoAO8dsSRHynZ4KIGULtZZ/jasYp\nnHJOFVUeL9libv2hdyBC2zPWfwKBgQDHfzZdBKI9/OVo9S69CCoJ+lXs+n7H0/H5\n1wXLYcVfurVs4p+AGXA+F+bZJGFnrYWUwTS8DbB4cTISCURyanxd/IU22qfjp646\nJPTpMLefdqRf01x5jxOHt3NbWTwOWQL/jCoC10VaIeY0jAWRcpYpGj/lbNenyQB6\nQWO8GyeHCwKBgQC1EgOWoBvl8P9YVRqoEoJ93MNvQ/yS2VBblqb/KK2Gm7WI5vpN\nenrUHrp3FD5xmlLFKBh7CtcjySUcLj+8iq35N8vykczTPF31Wzs6+8LSWwQW8c0l\nVIs5jAJZDC/jPXDDp2iqRBacK6TroKrijKdbuGVc9ZV95+RcExmweX/pkwKBgQCI\nSyry5cWKIAsDZ+6kir1dz7+Ahaq0DuLUU8jLqGJWApMMbs+VjsuWQHIgi7BYSr5m\nYJEMoTWdM4iHtfkjSgjplSnVzhDBgb+QTctcvUHWGhI2vYoCKnOnVvfiwtY63ykj\nOblB85yX9Wz3HWp4chaQwjRBI9k58iL3Y1EmJE8e/QKBgF1HJKXaaXNogVHN/O5+\nh5YvWAQlWkmfL9sD89Gt1regkd+DM/Vfx+0yPuCgfopmOc72WO5gMQ6TlcH+MRQS\nPc2O7cHbit2IxsKfYYANOLjfhXAiIYC+yvArdzTwn53Wni+USnFH1YD1XUV8wTGj\nT0XsKoxnGUq4twTDK6re0oRl\n-----END PRIVATE KEY-----\n",
+  "client_email": "dashboardhoax-service-account@inbound-source-431806-g7.iam.gserviceaccount.com",
+  "client_id": "110233701696815226341",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dashboardhoax-service-account%40inbound-source-431806-g7.iam.gserviceaccount.com",
+  "universe_domain": "googleapis.com"
+}

load_model.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from transformers import BertTokenizer, BertForSequenceClassification
+import streamlit as st
+# Dictionary to map model names to their paths
+model_paths = {
+    "cahya/bert-base-indonesian-522M": "Nakhwa/cahyabert",
+    "indobenchmark/indobert-base-p2": "Nakhwa/indobenchmark",
+    "indolem/indobert-base-uncased": "Nakhwa/indolem",
+    "mdhugol/indonesia-bert-sentiment-classification": "Nakhwa/mdhugol"
+}
+# Function to load the selected model
+@st.cache_resource
+def load_model(model_name):
+    path = model_paths[model_name]
+    tokenizer = BertTokenizer.from_pretrained(path)
+    model = BertForSequenceClassification.from_pretrained(path)
+    model.eval()
+    return tokenizer, model

mafindo_mix_llm.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cf40365bdcaf731eadc84f7c1622c75763d7277631749bed31adad6e90ff8e6
+size 19922497

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+streamlit==1.37.1
+pandas==2.2.2
+plotly==5.13.0
+wordcloud==1.9.3
+matplotlib==3.9.2
+lime==0.2.0.1
+torch==2.3.1
+numpy==1.26.4
+transformers==4.41.2
+streamlit-aggrid==1.0.5
+scikit-learn==1.5.1

styles.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# styles.py
+COMMON_CSS = """
+<style>
+.stSelectbox div[data-baseweb="select"] {
+    margin-top: -35px;
+}
+.stTextInput div[data-baseweb="input"] {
+    margin-top: -35px;
+}
+.stTextArea div[data-baseweb="textarea"] {
+    margin-top: -35px;
+}
+.stFileUploader div[data-baseweb="input"] {
+    margin-top: -100px;
+}
+.stSelectbox {
+    max-width: 300px;
+}
+.stTextInput, .stTextArea {
+    max-width: 1400px;
+}
+.stSelectbox div, .stTextInput input, .stTextArea textarea {
+    font-size: 14px;
+}
+.stButton > button {
+    font-size: 6px;
+    padding: 2px 8px;
+    border-radius: 10px;
+    background-color: #1560BD;
+    color: white;
+}
+.stButton > button:hover {
+    background-color: #1560BD;
+    border: none;
+    outline: none;
+}
+.stRadio div[data-baseweb="radio"] {
+    font-size: 14px; /* Ensure font size for the entire radio button group */
+    margin-top: -100px; /* Reduce margin between label and radio button */
+}
+</style>
+"""

test.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from torch.nn.functional import softmax
+from load_model import load_model  # Import the load_model function
+import numpy as np
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+import streamlit as st
+@st.cache_resource
+def get_model_and_tokenizer(model_name):
+    return load_model(model_name)
+# Initialize default model (could be anything, or even load dynamically)
+default_model_name = "cahya/bert-base-indonesian-522M"
+tokenizer, model = load_model(default_model_name)
+# Prediction function
+def predict_hoax(title, content):
+    if tokenizer is None or model is None:
+        raise ValueError("Model and tokenizer must be loaded before prediction.")
+    print(f"Using model: {model}")
+    print(f"Using tokenizer: {tokenizer}")
+    text = f"{title} [SEP] {content}"
+    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    probs = softmax(outputs.logits, dim=1)
+    pred = torch.argmax(probs, dim=1).item()
+    label = 'HOAX' if pred == 1 else 'NON-HOAX'
+    return label
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# model.to(device)
+# LIME prediction function
+def predict_proba_for_lime(texts):
+    results = []
+    for text in texts:
+        inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        probs = softmax(outputs.logits, dim=1).detach().cpu().numpy()
+        results.append(probs[0])
+    return np.array(results)
+def evaluate_model_performance(df, tokenizer, model):
+    true_labels = []
+    pred_labels = []
+    for index, row in df.iterrows():
+        true_label = row['Label']  # Menggunakan 'Title' sebagai label sebenarnya karena tidak ada 'Final_Result'
+        pred_label = predict_hoax(row['Title'], row['Content'])
+        true_labels.append(1 if true_label == 'HOAX' else 0)
+        pred_labels.append(1 if pred_label == 'HOAX' else 0)
+    accuracy = accuracy_score(true_labels, pred_labels)
+    precision = precision_score(true_labels, pred_labels, average='binary')
+    recall = recall_score(true_labels, pred_labels, average='binary')
+    f1 = f1_score(true_labels, pred_labels, average='binary')
+    return accuracy, precision, recall, f1