# ui_generators.py """ Generates HTML content and Matplotlib plots for the Gradio UI tabs. """ import pandas as pd import logging import matplotlib.pyplot as plt import matplotlib # To ensure backend is switched before any plt import from other modules if app structure changes # Switch backend for Matplotlib to Agg for Gradio compatibility matplotlib.use('Agg') # Assuming config.py contains all necessary constants from config import ( BUBBLE_POST_DATE_COLUMN_NAME, BUBBLE_MENTIONS_DATE_COLUMN_NAME, BUBBLE_MENTIONS_ID_COLUMN_NAME, FOLLOWER_STATS_TYPE_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN_DT, UI_DATE_FORMAT, UI_MONTH_FORMAT ) def display_main_dashboard(token_state): """Generates HTML for the main dashboard display using data from token_state.""" if not token_state or not token_state.get("token"): logging.warning("Dashboard display: Access denied. No token available.") return "❌ Access denied. No token available for dashboard." html_parts = ["

Dashboard Overview

"] # Display Recent Posts posts_df = token_state.get("bubble_posts_df", pd.DataFrame()) html_parts.append(f"

Recent Posts ({len(posts_df)} in Bubble):

") if not posts_df.empty: cols_to_show_posts = [col for col in [BUBBLE_POST_DATE_COLUMN_NAME, 'text', 'sentiment', 'summary_text', 'li_eb_label'] if col in posts_df.columns] if not cols_to_show_posts: html_parts.append("

No relevant post columns found to display.

") else: display_df_posts = posts_df.copy() if BUBBLE_POST_DATE_COLUMN_NAME in display_df_posts.columns: try: display_df_posts[BUBBLE_POST_DATE_COLUMN_NAME] = pd.to_datetime(display_df_posts[BUBBLE_POST_DATE_COLUMN_NAME], errors='coerce').dt.strftime(UI_DATE_FORMAT) display_df_posts = display_df_posts.sort_values(by=BUBBLE_POST_DATE_COLUMN_NAME, ascending=False) except Exception as e: logging.error(f"Error formatting post dates for display: {e}") html_parts.append("

Error formatting post dates.

") html_parts.append(display_df_posts[cols_to_show_posts].head().to_html(escape=False, index=False, classes="table table-striped table-sm")) else: html_parts.append("

No posts loaded from Bubble.

") html_parts.append("
") # Display Recent Mentions mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame()) html_parts.append(f"

Recent Mentions ({len(mentions_df)} in Bubble):

") if not mentions_df.empty: cols_to_show_mentions = [col for col in [BUBBLE_MENTIONS_DATE_COLUMN_NAME, "mention_text", "sentiment_label"] if col in mentions_df.columns] if not cols_to_show_mentions: html_parts.append("

No relevant mention columns found to display.

") else: display_df_mentions = mentions_df.copy() if BUBBLE_MENTIONS_DATE_COLUMN_NAME in display_df_mentions.columns: try: display_df_mentions[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(display_df_mentions[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce').dt.strftime(UI_DATE_FORMAT) display_df_mentions = display_df_mentions.sort_values(by=BUBBLE_MENTIONS_DATE_COLUMN_NAME, ascending=False) except Exception as e: logging.error(f"Error formatting mention dates for display: {e}") html_parts.append("

Error formatting mention dates.

") html_parts.append(display_df_mentions[cols_to_show_mentions].head().to_html(escape=False, index=False, classes="table table-striped table-sm")) else: html_parts.append("

No mentions loaded from Bubble.

") html_parts.append("
") # Display Follower Statistics Summary follower_stats_df = token_state.get("bubble_follower_stats_df", pd.DataFrame()) html_parts.append(f"

Follower Statistics ({len(follower_stats_df)} entries in Bubble):

") if not follower_stats_df.empty: monthly_gains = follower_stats_df[follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy() if not monthly_gains.empty and FOLLOWER_STATS_CATEGORY_COLUMN in monthly_gains.columns and \ FOLLOWER_STATS_ORGANIC_COLUMN in monthly_gains.columns and FOLLOWER_STATS_PAID_COLUMN in monthly_gains.columns: try: # FOLLOWER_STATS_CATEGORY_COLUMN for monthly gains is 'YYYY-MM-DD' monthly_gains.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN_DT] = pd.to_datetime(monthly_gains[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce') # Format original date column for display after sorting by datetime monthly_gains_display = monthly_gains.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=False) latest_gain = monthly_gains_display.head(1).copy() # Work with a copy for modification if not latest_gain.empty: latest_gain.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = latest_gain[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime(UI_DATE_FORMAT) # or UI_MONTH_FORMAT html_parts.append("
Latest Monthly Follower Gain:
") html_parts.append(latest_gain[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].to_html(escape=True, index=False, classes="table table-sm")) else: html_parts.append("

No valid monthly follower gain data to display after processing.

") except Exception as e: logging.error(f"Error formatting follower gain dates for display: {e}", exc_info=True) html_parts.append("

Error displaying monthly follower gain data.

") else: html_parts.append("

No monthly follower gain data or required columns are missing.

") demographics_count = len(follower_stats_df[follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly']) html_parts.append(f"

Total demographic entries (seniority, industry, etc.): {demographics_count}

") else: html_parts.append("

No follower statistics loaded from Bubble.

") html_parts.append("
") return "".join(html_parts) def run_mentions_tab_display(token_state): """Generates HTML and a plot for the Mentions tab.""" logging.info("Updating Mentions Tab display.") if not token_state or not token_state.get("token"): logging.warning("Mentions tab: Access denied. No token.") return "❌ Access denied. No token available for mentions.", None mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame()) if mentions_df.empty: logging.info("Mentions tab: No mentions data in Bubble.") return "

No mentions data in Bubble. Try syncing.

", None html_parts = ["

Recent Mentions

"] display_columns = [col for col in [BUBBLE_MENTIONS_DATE_COLUMN_NAME, "mention_text", "sentiment_label", BUBBLE_MENTIONS_ID_COLUMN_NAME] if col in mentions_df.columns] mentions_df_display = mentions_df.copy() if BUBBLE_MENTIONS_DATE_COLUMN_NAME in mentions_df_display.columns: try: mentions_df_display[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(mentions_df_display[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce').dt.strftime(UI_DATE_FORMAT) mentions_df_display = mentions_df_display.sort_values(by=BUBBLE_MENTIONS_DATE_COLUMN_NAME, ascending=False) except Exception as e: logging.error(f"Error formatting mention dates for tab display: {e}") html_parts.append("

Error formatting mention dates.

") if not display_columns or mentions_df_display[display_columns].empty: html_parts.append("

Required columns for mentions display are missing or no data after processing.

") else: html_parts.append(mentions_df_display[display_columns].head(20).to_html(escape=False, index=False, classes="table table-sm")) mentions_html_output = "\n".join(html_parts) fig = None if not mentions_df.empty and "sentiment_label" in mentions_df.columns: try: fig_plot, ax = plt.subplots(figsize=(6,4)) sentiment_counts = mentions_df["sentiment_label"].value_counts() sentiment_counts.plot(kind='bar', ax=ax, color=['#4CAF50', '#FFC107', '#F44336', '#9E9E9E', '#2196F3']) ax.set_title("Mention Sentiment Distribution") ax.set_ylabel("Count") plt.xticks(rotation=45, ha='right') plt.tight_layout() fig = fig_plot logging.info("Mentions tab: Sentiment distribution plot generated.") except Exception as e: logging.error(f"Error generating mentions plot: {e}", exc_info=True) fig = None else: logging.info("Mentions tab: Not enough data or 'sentiment_label' column missing for plot.") return mentions_html_output, fig def run_follower_stats_tab_display(token_state): """Generates HTML and plots for the Follower Stats tab.""" logging.info("Updating Follower Stats Tab display.") if not token_state or not token_state.get("token"): logging.warning("Follower stats tab: Access denied. No token.") return "❌ Access denied. No token available for follower stats.", None, None, None follower_stats_df_orig = token_state.get("bubble_follower_stats_df", pd.DataFrame()) if follower_stats_df_orig.empty: logging.info("Follower stats tab: No follower stats data in Bubble.") return "

No follower stats data in Bubble. Try syncing.

", None, None, None follower_stats_df = follower_stats_df_orig.copy() html_parts = ["

Follower Statistics Overview

"] plot_monthly_gains = None plot_seniority_dist = None plot_industry_dist = None # --- Monthly Gains Table & Plot --- monthly_gains_df = follower_stats_df[ (follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly') & (follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & (follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna()) & (follower_stats_df[FOLLOWER_STATS_PAID_COLUMN].notna()) ].copy() if not monthly_gains_df.empty: try: monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN_DT] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce') monthly_gains_df_sorted_table = monthly_gains_df.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=False) html_parts.append("

Monthly Follower Gains (Last 13 Months):

") table_display_df = monthly_gains_df_sorted_table.copy() table_display_df.loc[:,FOLLOWER_STATS_CATEGORY_COLUMN] = table_display_df[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime(UI_MONTH_FORMAT) # Use YYYY-MM for table html_parts.append(table_display_df[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(13).to_html(escape=True, index=False, classes="table table-sm")) monthly_gains_df_sorted_plot = monthly_gains_df.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=True).copy() # For plotting, group by month string to ensure unique x-ticks if multiple entries exist for a month (though unlikely for this data type) monthly_gains_df_sorted_plot.loc[:, '_plot_month'] = monthly_gains_df_sorted_plot[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime(UI_MONTH_FORMAT) plot_data = monthly_gains_df_sorted_plot.groupby('_plot_month').agg( organic=(FOLLOWER_STATS_ORGANIC_COLUMN, 'sum'), paid=(FOLLOWER_STATS_PAID_COLUMN, 'sum') ).reset_index().sort_values(by='_plot_month') fig_gains, ax_gains = plt.subplots(figsize=(10,5)) ax_gains.plot(plot_data['_plot_month'], plot_data['organic'], marker='o', linestyle='-', label='Organic Gain') ax_gains.plot(plot_data['_plot_month'], plot_data['paid'], marker='x', linestyle='--', label='Paid Gain') ax_gains.set_title("Monthly Follower Gains Over Time") ax_gains.set_ylabel("Follower Count") ax_gains.set_xlabel("Month (YYYY-MM)") plt.xticks(rotation=45, ha='right') ax_gains.legend() plt.grid(True, linestyle='--', alpha=0.7) plt.tight_layout() plot_monthly_gains = fig_gains logging.info("Follower stats tab: Monthly gains plot generated.") except Exception as e: logging.error(f"Error processing or plotting monthly gains: {e}", exc_info=True) html_parts.append("

Error displaying monthly follower gain data.

") else: html_parts.append("

No monthly follower gain data available or required columns missing.

") html_parts.append("
") # --- Seniority Table & Plot --- seniority_df = follower_stats_df[ (follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_seniority') & (follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & (follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna()) ].copy() if not seniority_df.empty: try: seniority_df_sorted = seniority_df.sort_values(by=FOLLOWER_STATS_ORGANIC_COLUMN, ascending=False) html_parts.append("

Followers by Seniority (Top 10 Organic):

") html_parts.append(seniority_df_sorted[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(10).to_html(escape=True, index=False, classes="table table-sm")) fig_seniority, ax_seniority = plt.subplots(figsize=(8,5)) top_n_seniority = seniority_df_sorted.nlargest(10, FOLLOWER_STATS_ORGANIC_COLUMN) ax_seniority.bar(top_n_seniority[FOLLOWER_STATS_CATEGORY_COLUMN], top_n_seniority[FOLLOWER_STATS_ORGANIC_COLUMN], color='skyblue') ax_seniority.set_title("Follower Distribution by Seniority (Top 10 Organic)") ax_seniority.set_ylabel("Organic Follower Count") plt.xticks(rotation=45, ha='right') plt.grid(axis='y', linestyle='--', alpha=0.7) plt.tight_layout() plot_seniority_dist = fig_seniority logging.info("Follower stats tab: Seniority distribution plot generated.") except Exception as e: logging.error(f"Error processing or plotting seniority data: {e}", exc_info=True) html_parts.append("

Error displaying follower seniority data.

") else: html_parts.append("

No follower seniority data available or required columns missing.

") html_parts.append("
") # --- Industry Table & Plot --- industry_df = follower_stats_df[ (follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_industry') & (follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & (follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna()) ].copy() if not industry_df.empty: try: industry_df_sorted = industry_df.sort_values(by=FOLLOWER_STATS_ORGANIC_COLUMN, ascending=False) html_parts.append("

Followers by Industry (Top 10 Organic):

") html_parts.append(industry_df_sorted[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(10).to_html(escape=True, index=False, classes="table table-sm")) fig_industry, ax_industry = plt.subplots(figsize=(8,5)) top_n_industry = industry_df_sorted.nlargest(10, FOLLOWER_STATS_ORGANIC_COLUMN) ax_industry.bar(top_n_industry[FOLLOWER_STATS_CATEGORY_COLUMN], top_n_industry[FOLLOWER_STATS_ORGANIC_COLUMN], color='lightcoral') ax_industry.set_title("Follower Distribution by Industry (Top 10 Organic)") ax_industry.set_ylabel("Organic Follower Count") plt.xticks(rotation=45, ha='right') plt.grid(axis='y', linestyle='--', alpha=0.7) plt.tight_layout() plot_industry_dist = fig_industry logging.info("Follower stats tab: Industry distribution plot generated.") except Exception as e: logging.error(f"Error processing or plotting industry data: {e}", exc_info=True) html_parts.append("

Error displaying follower industry data.

") else: html_parts.append("

No follower industry data available or required columns missing.

") html_parts.append("
") follower_html_output = "\n".join(html_parts) return follower_html_output, plot_monthly_gains, plot_seniority_dist, plot_industry_dist