File size: 15,878 Bytes
f20ee95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
import pandas as pd
import matplotlib.pyplot as plt
import logging
from io import BytesIO
import base64
import numpy as np

# Configure logging for this module
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')

def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
    """Creates a placeholder Matplotlib plot indicating no data or an error."""
    try:
        fig, ax = plt.subplots(figsize=(8, 4)) 
        ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True)
        ax.axis('off') 
        plt.tight_layout()
        return fig
    except Exception as e:
        logging.error(f"Error creating placeholder plot: {e}")
        fig, ax = plt.subplots()
        ax.text(0.5, 0.5, "Plot generation error", ha='center', va='center')
        ax.axis('off')
        return fig
    finally:
        # plt.close(fig) # Close the specific figure to free memory
        # More robustly, Gradio handles figure objects, explicit close might not always be needed here
        # but plt.close('all') in calling functions or after a block of plot generations is safer.
        pass


def generate_posts_activity_plot(df, date_column='published_at'): # Default changed as per common use
    """
    Generates a plot for posts activity over time.
    Assumes df has a date_column (e.g., 'published_at') and groups by date to count posts.
    """
    logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
    if df is None or df.empty:
        logging.warning(f"Posts activity: DataFrame is empty.")
        return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
    if date_column not in df.columns:
        logging.warning(f"Posts activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
        return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")

    try:
        df_copy = df.copy() 
        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
        
        df_copy = df_copy.dropna(subset=[date_column])
        if df_copy.empty:
            logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
            return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")

        posts_over_time = df_copy.set_index(date_column).resample('D').size() 
        
        if posts_over_time.empty:
             logging.info("Posts activity: No posts after resampling by day.")
             return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")

        fig, ax = plt.subplots(figsize=(10, 5))
        posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
        ax.set_title('Posts Activity Over Time')
        ax.set_xlabel('Date')
        ax.set_ylabel('Number of Posts')
        ax.grid(True, linestyle='--', alpha=0.7)
        plt.xticks(rotation=45)
        plt.tight_layout()
        logging.info("Successfully generated posts activity plot.")
        return fig
    except Exception as e:
        logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
        return create_placeholder_plot(title="Posts Activity Error", message=str(e))
    finally:
        plt.close('all') 

def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='comments_count', shares_col='shares_count'):
    """
    Generates a bar plot for total engagement types (likes, comments, shares).
    Input df is expected to be pre-filtered by date if necessary.
    """
    logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
    
    required_cols = [likes_col, comments_col, shares_col]
    if df is None or df.empty:
        logging.warning("Engagement type: DataFrame is empty.")
        return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.")
    
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}"
        logging.warning(msg)
        return create_placeholder_plot(title="Post Engagement Types", message=msg)

    try:
        df_copy = df.copy() # Work on a copy
        for col in required_cols: # Ensure numeric, fill NaNs with 0
            df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)

        total_likes = df_copy[likes_col].sum()
        total_comments = df_copy[comments_col].sum()
        total_shares = df_copy[shares_col].sum()

        if total_likes == 0 and total_comments == 0 and total_shares == 0:
            logging.info("Engagement type: All engagement counts are zero.")
            return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.")

        engagement_data = {
            'Likes': total_likes,
            'Comments': total_comments,
            'Shares': total_shares
        }
        
        fig, ax = plt.subplots(figsize=(8, 5))
        bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon'])
        ax.set_title('Total Post Engagement Types')
        ax.set_xlabel('Engagement Type')
        ax.set_ylabel('Total Count')
        ax.grid(axis='y', linestyle='--', alpha=0.7)
        
        for bar in bars:
            yval = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom')
            
        plt.tight_layout()
        logging.info("Successfully generated engagement type plot.")
        return fig
    except Exception as e:
        logging.error(f"Error generating engagement type plot: {e}", exc_info=True)
        return create_placeholder_plot(title="Engagement Type Error", message=str(e))
    finally:
        plt.close('all')

def generate_mentions_activity_plot(df, date_column='date'): # Default changed as per common use
    """
    Generates a plot for mentions activity over time.
    Assumes df has a date_column (e.g., 'date') and groups by date to count mentions.
    """
    logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
    if df is None or df.empty:
        logging.warning(f"Mentions activity: DataFrame is empty.")
        return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
    if date_column not in df.columns:
        logging.warning(f"Mentions activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
        return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
        
    try:
        df_copy = df.copy()
        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
        
        df_copy = df_copy.dropna(subset=[date_column])
        if df_copy.empty:
            logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
            return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")

        mentions_over_time = df_copy.set_index(date_column).resample('D').size()
        
        if mentions_over_time.empty:
            logging.info("Mentions activity: No mentions after resampling by day.")
            return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")

        fig, ax = plt.subplots(figsize=(10, 5))
        mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple')
        ax.set_title('Mentions Activity Over Time')
        ax.set_xlabel('Date')
        ax.set_ylabel('Number of Mentions')
        ax.grid(True, linestyle='--', alpha=0.7)
        plt.xticks(rotation=45)
        plt.tight_layout()
        logging.info("Successfully generated mentions activity plot.")
        return fig
    except Exception as e:
        logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
        return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
    finally:
        plt.close('all')

def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'): 
    """
    Generates a pie chart for mention sentiment distribution.
    Input df is expected to be pre-filtered by date if necessary.
    """
    logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
    
    if df is None or df.empty:
        logging.warning("Mention sentiment: DataFrame is empty.")
        return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
    if sentiment_column not in df.columns:
        msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}"
        logging.warning(msg)
        return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)

    try:
        df_copy = df.copy()
        sentiment_counts = df_copy[sentiment_column].value_counts()
        if sentiment_counts.empty:
            logging.info("Mention sentiment: No sentiment data after value_counts.")
            return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")

        fig, ax = plt.subplots(figsize=(8, 5))
        colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
        pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index] # Default color for unknown sentiments


        ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
        ax.set_title('Mention Sentiment Distribution')
        ax.axis('equal') 
        plt.tight_layout()
        logging.info("Successfully generated mention sentiment plot.")
        return fig
    except Exception as e:
        logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
        return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
    finally:
        plt.close('all')

def generate_follower_growth_plot(df, date_column='date', count_column='total_followers'): 
    """
    Generates a plot for follower growth over time.
    This function receives the *unfiltered* follower DataFrame.
    """
    logging.info(f"Generating follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. Input df rows: {len(df) if df is not None else 'None'}")

    if df is None or df.empty:
        logging.warning("Follower growth: DataFrame is empty.")
        return create_placeholder_plot(title="Follower Growth Over Time", message="No follower data available.")
    if date_column not in df.columns or count_column not in df.columns:
        missing = []
        if date_column not in df.columns: missing.append(date_column)
        if count_column not in df.columns: missing.append(count_column)
        msg = f"Follower growth: Columns missing: {missing}. Available: {df.columns.tolist()}"
        logging.warning(msg)
        return create_placeholder_plot(title="Follower Growth Over Time", message=msg)

    try:
        df_copy = df.copy()
        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
        
        df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
        df_copy = df_copy.dropna(subset=[date_column, count_column])

        if df_copy.empty:
            logging.info("Follower growth: DataFrame empty after NaNs dropped from date/count columns.")
            return create_placeholder_plot(title="Follower Growth Over Time", message="No valid data for follower growth.")

        df_copy = df_copy.sort_values(by=date_column)
        
        fig, ax = plt.subplots(figsize=(10, 5))
        ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
        ax.set_title('Follower Growth Over Time')
        ax.set_xlabel('Date')
        ax.set_ylabel('Total Followers')
        ax.grid(True, linestyle='--', alpha=0.7)
        plt.xticks(rotation=45)
        plt.tight_layout()
        logging.info("Successfully generated follower growth plot.")
        return fig
    except Exception as e:
        logging.error(f"Error generating follower growth plot: {e}", exc_info=True)
        return create_placeholder_plot(title="Follower Growth Error", message=str(e))
    finally:
        plt.close('all')

if __name__ == '__main__':
    # Create dummy data for testing
    posts_data = {
        'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03']),
        'likes_count': [10, 5, 12, 8, 15, 3],
        'comments_count': [2, 1, 3, 1, 4, 0],
        'shares_count': [1, 0, 1, 1, 2, 0]
    }
    sample_posts_df = pd.DataFrame(posts_data)

    mentions_data = {
        'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
        'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
    }
    sample_mentions_df = pd.DataFrame(mentions_data)

    follower_data = {
        'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']),
        'total_followers': [100, 105, 115, 120, 118] # Example data
    }
    sample_follower_stats_df = pd.DataFrame(follower_data)

    logging.info("--- Testing Plot Generations ---")

    fig1 = generate_posts_activity_plot(sample_posts_df.copy(), date_column='published_at')
    if fig1: logging.info("Posts activity plot generated.") # plt.show() for local test

    fig2 = generate_engagement_type_plot(sample_posts_df.copy())
    if fig2: logging.info("Engagement type plot generated.")

    fig3 = generate_mentions_activity_plot(sample_mentions_df.copy(), date_column='date')
    if fig3: logging.info("Mentions activity plot generated.")

    fig4 = generate_mention_sentiment_plot(sample_mentions_df.copy())
    if fig4: logging.info("Mention sentiment plot generated.")
    
    fig5 = generate_follower_growth_plot(sample_follower_stats_df.copy(), date_column='date', count_column='total_followers')
    if fig5: logging.info("Follower growth plot generated.")
    
    logging.info("--- Testing Placeholders ---")
    fig_placeholder = create_placeholder_plot()
    if fig_placeholder: logging.info("Placeholder plot generated.")
    
    empty_df = pd.DataFrame(columns=['published_at']) # Empty df with column
    fig_empty_posts = generate_posts_activity_plot(empty_df, date_column='published_at')
    if fig_empty_posts: logging.info("Empty posts activity plot (placeholder) generated.")
    
    df_no_col = pd.DataFrame({'some_other_date': pd.to_datetime(['2023-01-01'])})
    fig_no_col_posts = generate_posts_activity_plot(df_no_col, date_column='published_at')
    if fig_no_col_posts: logging.info("Posts activity with missing column (placeholder) generated.")


    logging.info("Test script finished.")