Spaces:
Running
Running
import pandas as pd | |
import matplotlib.pyplot as plt | |
import logging | |
from io import BytesIO | |
import base64 | |
import numpy as np | |
# Configure logging for this module | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s') | |
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."): | |
"""Creates a placeholder Matplotlib plot indicating no data or an error.""" | |
try: | |
fig, ax = plt.subplots(figsize=(8, 4)) | |
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True) | |
ax.axis('off') | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error creating placeholder plot: {e}") | |
fig, ax = plt.subplots() | |
ax.text(0.5, 0.5, "Plot generation error", ha='center', va='center') | |
ax.axis('off') | |
return fig | |
finally: | |
# plt.close(fig) # Close the specific figure to free memory | |
# More robustly, Gradio handles figure objects, explicit close might not always be needed here | |
# but plt.close('all') in calling functions or after a block of plot generations is safer. | |
pass | |
def generate_posts_activity_plot(df, date_column='published_at'): # Default changed as per common use | |
""" | |
Generates a plot for posts activity over time. | |
Assumes df has a date_column (e.g., 'published_at') and groups by date to count posts. | |
""" | |
logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning(f"Posts activity: DataFrame is empty.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.") | |
if date_column not in df.columns: | |
logging.warning(f"Posts activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.") | |
try: | |
df_copy = df.copy() | |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]) | |
if df_copy.empty: | |
logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.") | |
posts_over_time = df_copy.set_index(date_column).resample('D').size() | |
if posts_over_time.empty: | |
logging.info("Posts activity: No posts after resampling by day.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-') | |
ax.set_title('Posts Activity Over Time') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Number of Posts') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
logging.info("Successfully generated posts activity plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating posts activity plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Posts Activity Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='comments_count', shares_col='shares_count'): | |
""" | |
Generates a bar plot for total engagement types (likes, comments, shares). | |
Input df is expected to be pre-filtered by date if necessary. | |
""" | |
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}") | |
required_cols = [likes_col, comments_col, shares_col] | |
if df is None or df.empty: | |
logging.warning("Engagement type: DataFrame is empty.") | |
return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.") | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}" | |
logging.warning(msg) | |
return create_placeholder_plot(title="Post Engagement Types", message=msg) | |
try: | |
df_copy = df.copy() # Work on a copy | |
for col in required_cols: # Ensure numeric, fill NaNs with 0 | |
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0) | |
total_likes = df_copy[likes_col].sum() | |
total_comments = df_copy[comments_col].sum() | |
total_shares = df_copy[shares_col].sum() | |
if total_likes == 0 and total_comments == 0 and total_shares == 0: | |
logging.info("Engagement type: All engagement counts are zero.") | |
return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.") | |
engagement_data = { | |
'Likes': total_likes, | |
'Comments': total_comments, | |
'Shares': total_shares | |
} | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon']) | |
ax.set_title('Total Post Engagement Types') | |
ax.set_xlabel('Engagement Type') | |
ax.set_ylabel('Total Count') | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
for bar in bars: | |
yval = bar.get_height() | |
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom') | |
plt.tight_layout() | |
logging.info("Successfully generated engagement type plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating engagement type plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Engagement Type Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_mentions_activity_plot(df, date_column='date'): # Default changed as per common use | |
""" | |
Generates a plot for mentions activity over time. | |
Assumes df has a date_column (e.g., 'date') and groups by date to count mentions. | |
""" | |
logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning(f"Mentions activity: DataFrame is empty.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.") | |
if date_column not in df.columns: | |
logging.warning(f"Mentions activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.") | |
try: | |
df_copy = df.copy() | |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]) | |
if df_copy.empty: | |
logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.") | |
mentions_over_time = df_copy.set_index(date_column).resample('D').size() | |
if mentions_over_time.empty: | |
logging.info("Mentions activity: No mentions after resampling by day.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple') | |
ax.set_title('Mentions Activity Over Time') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Number of Mentions') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
logging.info("Successfully generated mentions activity plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Mentions Activity Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'): | |
""" | |
Generates a pie chart for mention sentiment distribution. | |
Input df is expected to be pre-filtered by date if necessary. | |
""" | |
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning("Mention sentiment: DataFrame is empty.") | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.") | |
if sentiment_column not in df.columns: | |
msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}" | |
logging.warning(msg) | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg) | |
try: | |
df_copy = df.copy() | |
sentiment_counts = df_copy[sentiment_column].value_counts() | |
if sentiment_counts.empty: | |
logging.info("Mention sentiment: No sentiment data after value_counts.") | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.") | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'} | |
pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index] # Default color for unknown sentiments | |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) | |
ax.set_title('Mention Sentiment Distribution') | |
ax.axis('equal') | |
plt.tight_layout() | |
logging.info("Successfully generated mention sentiment plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_follower_growth_plot(df, date_column='date', count_column='total_followers'): | |
""" | |
Generates a plot for follower growth over time. | |
This function receives the *unfiltered* follower DataFrame. | |
""" | |
logging.info(f"Generating follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning("Follower growth: DataFrame is empty.") | |
return create_placeholder_plot(title="Follower Growth Over Time", message="No follower data available.") | |
if date_column not in df.columns or count_column not in df.columns: | |
missing = [] | |
if date_column not in df.columns: missing.append(date_column) | |
if count_column not in df.columns: missing.append(count_column) | |
msg = f"Follower growth: Columns missing: {missing}. Available: {df.columns.tolist()}" | |
logging.warning(msg) | |
return create_placeholder_plot(title="Follower Growth Over Time", message=msg) | |
try: | |
df_copy = df.copy() | |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, count_column]) | |
if df_copy.empty: | |
logging.info("Follower growth: DataFrame empty after NaNs dropped from date/count columns.") | |
return create_placeholder_plot(title="Follower Growth Over Time", message="No valid data for follower growth.") | |
df_copy = df_copy.sort_values(by=date_column) | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green') | |
ax.set_title('Follower Growth Over Time') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Total Followers') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
logging.info("Successfully generated follower growth plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating follower growth plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Follower Growth Error", message=str(e)) | |
finally: | |
plt.close('all') | |
if __name__ == '__main__': | |
# Create dummy data for testing | |
posts_data = { | |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03']), | |
'likes_count': [10, 5, 12, 8, 15, 3], | |
'comments_count': [2, 1, 3, 1, 4, 0], | |
'shares_count': [1, 0, 1, 1, 2, 0] | |
} | |
sample_posts_df = pd.DataFrame(posts_data) | |
mentions_data = { | |
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']), | |
'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral'] | |
} | |
sample_mentions_df = pd.DataFrame(mentions_data) | |
follower_data = { | |
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']), | |
'total_followers': [100, 105, 115, 120, 118] # Example data | |
} | |
sample_follower_stats_df = pd.DataFrame(follower_data) | |
logging.info("--- Testing Plot Generations ---") | |
fig1 = generate_posts_activity_plot(sample_posts_df.copy(), date_column='published_at') | |
if fig1: logging.info("Posts activity plot generated.") # plt.show() for local test | |
fig2 = generate_engagement_type_plot(sample_posts_df.copy()) | |
if fig2: logging.info("Engagement type plot generated.") | |
fig3 = generate_mentions_activity_plot(sample_mentions_df.copy(), date_column='date') | |
if fig3: logging.info("Mentions activity plot generated.") | |
fig4 = generate_mention_sentiment_plot(sample_mentions_df.copy()) | |
if fig4: logging.info("Mention sentiment plot generated.") | |
fig5 = generate_follower_growth_plot(sample_follower_stats_df.copy(), date_column='date', count_column='total_followers') | |
if fig5: logging.info("Follower growth plot generated.") | |
logging.info("--- Testing Placeholders ---") | |
fig_placeholder = create_placeholder_plot() | |
if fig_placeholder: logging.info("Placeholder plot generated.") | |
empty_df = pd.DataFrame(columns=['published_at']) # Empty df with column | |
fig_empty_posts = generate_posts_activity_plot(empty_df, date_column='published_at') | |
if fig_empty_posts: logging.info("Empty posts activity plot (placeholder) generated.") | |
df_no_col = pd.DataFrame({'some_other_date': pd.to_datetime(['2023-01-01'])}) | |
fig_no_col_posts = generate_posts_activity_plot(df_no_col, date_column='published_at') | |
if fig_no_col_posts: logging.info("Posts activity with missing column (placeholder) generated.") | |
logging.info("Test script finished.") | |