Spaces:
Running
Running
# ui_generators.py | |
""" | |
Generates HTML content and Matplotlib plots for the Gradio UI tabs. | |
""" | |
import pandas as pd | |
import logging | |
import matplotlib.pyplot as plt | |
import matplotlib # To ensure backend is switched before any plt import from other modules if app structure changes | |
# Switch backend for Matplotlib to Agg for Gradio compatibility | |
matplotlib.use('Agg') | |
# Assuming config.py contains all necessary constants | |
from config import ( | |
BUBBLE_POST_DATE_COLUMN_NAME, BUBBLE_MENTIONS_DATE_COLUMN_NAME, BUBBLE_MENTIONS_ID_COLUMN_NAME, | |
FOLLOWER_STATS_TYPE_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, | |
FOLLOWER_STATS_PAID_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN_DT, UI_DATE_FORMAT, UI_MONTH_FORMAT | |
) | |
def display_main_dashboard(token_state): | |
"""Generates HTML for the main dashboard display using data from token_state.""" | |
if not token_state or not token_state.get("token"): | |
logging.warning("Dashboard display: Access denied. No token available.") | |
return "❌ Access denied. No token available for dashboard." | |
html_parts = ["<div style='padding:10px;'><h3>Dashboard Overview</h3>"] | |
# Display Recent Posts | |
posts_df = token_state.get("bubble_posts_df", pd.DataFrame()) | |
html_parts.append(f"<h4>Recent Posts ({len(posts_df)} in Bubble):</h4>") | |
if not posts_df.empty: | |
cols_to_show_posts = [col for col in [BUBBLE_POST_DATE_COLUMN_NAME, 'text', 'sentiment', 'summary_text', 'li_eb_label'] if col in posts_df.columns] | |
if not cols_to_show_posts: | |
html_parts.append("<p>No relevant post columns found to display.</p>") | |
else: | |
display_df_posts = posts_df.copy() | |
if BUBBLE_POST_DATE_COLUMN_NAME in display_df_posts.columns: | |
try: | |
display_df_posts[BUBBLE_POST_DATE_COLUMN_NAME] = pd.to_datetime(display_df_posts[BUBBLE_POST_DATE_COLUMN_NAME], errors='coerce').dt.strftime(UI_DATE_FORMAT) | |
display_df_posts = display_df_posts.sort_values(by=BUBBLE_POST_DATE_COLUMN_NAME, ascending=False) | |
except Exception as e: | |
logging.error(f"Error formatting post dates for display: {e}") | |
html_parts.append("<p>Error formatting post dates.</p>") | |
html_parts.append(display_df_posts[cols_to_show_posts].head().to_html(escape=False, index=False, classes="table table-striped table-sm")) | |
else: | |
html_parts.append("<p>No posts loaded from Bubble.</p>") | |
html_parts.append("<hr/>") | |
# Display Recent Mentions | |
mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame()) | |
html_parts.append(f"<h4>Recent Mentions ({len(mentions_df)} in Bubble):</h4>") | |
if not mentions_df.empty: | |
cols_to_show_mentions = [col for col in [BUBBLE_MENTIONS_DATE_COLUMN_NAME, "mention_text", "sentiment_label"] if col in mentions_df.columns] | |
if not cols_to_show_mentions: | |
html_parts.append("<p>No relevant mention columns found to display.</p>") | |
else: | |
display_df_mentions = mentions_df.copy() | |
if BUBBLE_MENTIONS_DATE_COLUMN_NAME in display_df_mentions.columns: | |
try: | |
display_df_mentions[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(display_df_mentions[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce').dt.strftime(UI_DATE_FORMAT) | |
display_df_mentions = display_df_mentions.sort_values(by=BUBBLE_MENTIONS_DATE_COLUMN_NAME, ascending=False) | |
except Exception as e: | |
logging.error(f"Error formatting mention dates for display: {e}") | |
html_parts.append("<p>Error formatting mention dates.</p>") | |
html_parts.append(display_df_mentions[cols_to_show_mentions].head().to_html(escape=False, index=False, classes="table table-striped table-sm")) | |
else: | |
html_parts.append("<p>No mentions loaded from Bubble.</p>") | |
html_parts.append("<hr/>") | |
# Display Follower Statistics Summary | |
follower_stats_df = token_state.get("bubble_follower_stats_df", pd.DataFrame()) | |
html_parts.append(f"<h4>Follower Statistics ({len(follower_stats_df)} entries in Bubble):</h4>") | |
if not follower_stats_df.empty: | |
monthly_gains = follower_stats_df[follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy() | |
if not monthly_gains.empty and FOLLOWER_STATS_CATEGORY_COLUMN in monthly_gains.columns and \ | |
FOLLOWER_STATS_ORGANIC_COLUMN in monthly_gains.columns and FOLLOWER_STATS_PAID_COLUMN in monthly_gains.columns: | |
try: | |
# FOLLOWER_STATS_CATEGORY_COLUMN for monthly gains is 'YYYY-MM-DD' | |
monthly_gains.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN_DT] = pd.to_datetime(monthly_gains[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce') | |
# Format original date column for display after sorting by datetime | |
monthly_gains_display = monthly_gains.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=False) | |
latest_gain = monthly_gains_display.head(1).copy() # Work with a copy for modification | |
if not latest_gain.empty: | |
latest_gain.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = latest_gain[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime(UI_DATE_FORMAT) # or UI_MONTH_FORMAT | |
html_parts.append("<h5>Latest Monthly Follower Gain:</h5>") | |
html_parts.append(latest_gain[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].to_html(escape=True, index=False, classes="table table-sm")) | |
else: | |
html_parts.append("<p>No valid monthly follower gain data to display after processing.</p>") | |
except Exception as e: | |
logging.error(f"Error formatting follower gain dates for display: {e}", exc_info=True) | |
html_parts.append("<p>Error displaying monthly follower gain data.</p>") | |
else: | |
html_parts.append("<p>No monthly follower gain data or required columns are missing.</p>") | |
demographics_count = len(follower_stats_df[follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly']) | |
html_parts.append(f"<p>Total demographic entries (seniority, industry, etc.): {demographics_count}</p>") | |
else: | |
html_parts.append("<p>No follower statistics loaded from Bubble.</p>") | |
html_parts.append("</div>") | |
return "".join(html_parts) | |
def run_mentions_tab_display(token_state): | |
"""Generates HTML and a plot for the Mentions tab.""" | |
logging.info("Updating Mentions Tab display.") | |
if not token_state or not token_state.get("token"): | |
logging.warning("Mentions tab: Access denied. No token.") | |
return "❌ Access denied. No token available for mentions.", None | |
mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame()) | |
if mentions_df.empty: | |
logging.info("Mentions tab: No mentions data in Bubble.") | |
return "<p style='text-align:center;'>No mentions data in Bubble. Try syncing.</p>", None | |
html_parts = ["<h3 style='text-align:center;'>Recent Mentions</h3>"] | |
display_columns = [col for col in [BUBBLE_MENTIONS_DATE_COLUMN_NAME, "mention_text", "sentiment_label", BUBBLE_MENTIONS_ID_COLUMN_NAME] if col in mentions_df.columns] | |
mentions_df_display = mentions_df.copy() | |
if BUBBLE_MENTIONS_DATE_COLUMN_NAME in mentions_df_display.columns: | |
try: | |
mentions_df_display[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(mentions_df_display[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce').dt.strftime(UI_DATE_FORMAT) | |
mentions_df_display = mentions_df_display.sort_values(by=BUBBLE_MENTIONS_DATE_COLUMN_NAME, ascending=False) | |
except Exception as e: | |
logging.error(f"Error formatting mention dates for tab display: {e}") | |
html_parts.append("<p>Error formatting mention dates.</p>") | |
if not display_columns or mentions_df_display[display_columns].empty: | |
html_parts.append("<p>Required columns for mentions display are missing or no data after processing.</p>") | |
else: | |
html_parts.append(mentions_df_display[display_columns].head(20).to_html(escape=False, index=False, classes="table table-sm")) | |
mentions_html_output = "\n".join(html_parts) | |
fig = None | |
if not mentions_df.empty and "sentiment_label" in mentions_df.columns: | |
try: | |
fig_plot, ax = plt.subplots(figsize=(6,4)) | |
sentiment_counts = mentions_df["sentiment_label"].value_counts() | |
sentiment_counts.plot(kind='bar', ax=ax, color=['#4CAF50', '#FFC107', '#F44336', '#9E9E9E', '#2196F3']) | |
ax.set_title("Mention Sentiment Distribution") | |
ax.set_ylabel("Count") | |
plt.xticks(rotation=45, ha='right') | |
plt.tight_layout() | |
fig = fig_plot | |
logging.info("Mentions tab: Sentiment distribution plot generated.") | |
except Exception as e: | |
logging.error(f"Error generating mentions plot: {e}", exc_info=True) | |
fig = None | |
else: | |
logging.info("Mentions tab: Not enough data or 'sentiment_label' column missing for plot.") | |
return mentions_html_output, fig | |
def run_follower_stats_tab_display(token_state): | |
"""Generates HTML and plots for the Follower Stats tab.""" | |
logging.info("Updating Follower Stats Tab display.") | |
if not token_state or not token_state.get("token"): | |
logging.warning("Follower stats tab: Access denied. No token.") | |
return "❌ Access denied. No token available for follower stats.", None, None, None | |
follower_stats_df_orig = token_state.get("bubble_follower_stats_df", pd.DataFrame()) | |
if follower_stats_df_orig.empty: | |
logging.info("Follower stats tab: No follower stats data in Bubble.") | |
return "<p style='text-align:center;'>No follower stats data in Bubble. Try syncing.</p>", None, None, None | |
follower_stats_df = follower_stats_df_orig.copy() | |
html_parts = ["<div style='padding:10px;'><h3 style='text-align:center;'>Follower Statistics Overview</h3>"] | |
plot_monthly_gains = None | |
plot_seniority_dist = None | |
plot_industry_dist = None | |
# --- Monthly Gains Table & Plot --- | |
monthly_gains_df = follower_stats_df[ | |
(follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly') & | |
(follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & | |
(follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna()) & | |
(follower_stats_df[FOLLOWER_STATS_PAID_COLUMN].notna()) | |
].copy() | |
if not monthly_gains_df.empty: | |
try: | |
monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN_DT] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce') | |
monthly_gains_df_sorted_table = monthly_gains_df.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=False) | |
html_parts.append("<h4>Monthly Follower Gains (Last 13 Months):</h4>") | |
table_display_df = monthly_gains_df_sorted_table.copy() | |
table_display_df.loc[:,FOLLOWER_STATS_CATEGORY_COLUMN] = table_display_df[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime(UI_MONTH_FORMAT) # Use YYYY-MM for table | |
html_parts.append(table_display_df[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(13).to_html(escape=True, index=False, classes="table table-sm")) | |
monthly_gains_df_sorted_plot = monthly_gains_df.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=True).copy() | |
# For plotting, group by month string to ensure unique x-ticks if multiple entries exist for a month (though unlikely for this data type) | |
monthly_gains_df_sorted_plot.loc[:, '_plot_month'] = monthly_gains_df_sorted_plot[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime(UI_MONTH_FORMAT) | |
plot_data = monthly_gains_df_sorted_plot.groupby('_plot_month').agg( | |
organic=(FOLLOWER_STATS_ORGANIC_COLUMN, 'sum'), | |
paid=(FOLLOWER_STATS_PAID_COLUMN, 'sum') | |
).reset_index().sort_values(by='_plot_month') | |
fig_gains, ax_gains = plt.subplots(figsize=(10,5)) | |
ax_gains.plot(plot_data['_plot_month'], plot_data['organic'], marker='o', linestyle='-', label='Organic Gain') | |
ax_gains.plot(plot_data['_plot_month'], plot_data['paid'], marker='x', linestyle='--', label='Paid Gain') | |
ax_gains.set_title("Monthly Follower Gains Over Time") | |
ax_gains.set_ylabel("Follower Count") | |
ax_gains.set_xlabel("Month (YYYY-MM)") | |
plt.xticks(rotation=45, ha='right') | |
ax_gains.legend() | |
plt.grid(True, linestyle='--', alpha=0.7) | |
plt.tight_layout() | |
plot_monthly_gains = fig_gains | |
logging.info("Follower stats tab: Monthly gains plot generated.") | |
except Exception as e: | |
logging.error(f"Error processing or plotting monthly gains: {e}", exc_info=True) | |
html_parts.append("<p>Error displaying monthly follower gain data.</p>") | |
else: | |
html_parts.append("<p>No monthly follower gain data available or required columns missing.</p>") | |
html_parts.append("<hr/>") | |
# --- Seniority Table & Plot --- | |
seniority_df = follower_stats_df[ | |
(follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_seniority') & | |
(follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & | |
(follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna()) | |
].copy() | |
if not seniority_df.empty: | |
try: | |
seniority_df_sorted = seniority_df.sort_values(by=FOLLOWER_STATS_ORGANIC_COLUMN, ascending=False) | |
html_parts.append("<h4>Followers by Seniority (Top 10 Organic):</h4>") | |
html_parts.append(seniority_df_sorted[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(10).to_html(escape=True, index=False, classes="table table-sm")) | |
fig_seniority, ax_seniority = plt.subplots(figsize=(8,5)) | |
top_n_seniority = seniority_df_sorted.nlargest(10, FOLLOWER_STATS_ORGANIC_COLUMN) | |
ax_seniority.bar(top_n_seniority[FOLLOWER_STATS_CATEGORY_COLUMN], top_n_seniority[FOLLOWER_STATS_ORGANIC_COLUMN], color='skyblue') | |
ax_seniority.set_title("Follower Distribution by Seniority (Top 10 Organic)") | |
ax_seniority.set_ylabel("Organic Follower Count") | |
plt.xticks(rotation=45, ha='right') | |
plt.grid(axis='y', linestyle='--', alpha=0.7) | |
plt.tight_layout() | |
plot_seniority_dist = fig_seniority | |
logging.info("Follower stats tab: Seniority distribution plot generated.") | |
except Exception as e: | |
logging.error(f"Error processing or plotting seniority data: {e}", exc_info=True) | |
html_parts.append("<p>Error displaying follower seniority data.</p>") | |
else: | |
html_parts.append("<p>No follower seniority data available or required columns missing.</p>") | |
html_parts.append("<hr/>") | |
# --- Industry Table & Plot --- | |
industry_df = follower_stats_df[ | |
(follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_industry') & | |
(follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & | |
(follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna()) | |
].copy() | |
if not industry_df.empty: | |
try: | |
industry_df_sorted = industry_df.sort_values(by=FOLLOWER_STATS_ORGANIC_COLUMN, ascending=False) | |
html_parts.append("<h4>Followers by Industry (Top 10 Organic):</h4>") | |
html_parts.append(industry_df_sorted[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(10).to_html(escape=True, index=False, classes="table table-sm")) | |
fig_industry, ax_industry = plt.subplots(figsize=(8,5)) | |
top_n_industry = industry_df_sorted.nlargest(10, FOLLOWER_STATS_ORGANIC_COLUMN) | |
ax_industry.bar(top_n_industry[FOLLOWER_STATS_CATEGORY_COLUMN], top_n_industry[FOLLOWER_STATS_ORGANIC_COLUMN], color='lightcoral') | |
ax_industry.set_title("Follower Distribution by Industry (Top 10 Organic)") | |
ax_industry.set_ylabel("Organic Follower Count") | |
plt.xticks(rotation=45, ha='right') | |
plt.grid(axis='y', linestyle='--', alpha=0.7) | |
plt.tight_layout() | |
plot_industry_dist = fig_industry | |
logging.info("Follower stats tab: Industry distribution plot generated.") | |
except Exception as e: | |
logging.error(f"Error processing or plotting industry data: {e}", exc_info=True) | |
html_parts.append("<p>Error displaying follower industry data.</p>") | |
else: | |
html_parts.append("<p>No follower industry data available or required columns missing.</p>") | |
html_parts.append("</div>") | |
follower_html_output = "\n".join(html_parts) | |
return follower_html_output, plot_monthly_gains, plot_seniority_dist, plot_industry_dist | |