GuglielmoTor commited on
Commit
9981a7c
·
verified ·
1 Parent(s): b62b1ee

Update insight_and_tasks/agents/follower_agent.py

Browse files
insight_and_tasks/agents/follower_agent.py CHANGED
@@ -0,0 +1,502 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # agents/follower_agent.py
2
+ import pandas as pd
3
+ from typing import Dict, List, Any, Optional
4
+ import logging
5
+ import pandasai as pai # Assuming pandasai is imported as pai globally or configured
6
+
7
+ from google.adk.agents import LlmAgent # Assuming this is the correct import path
8
+
9
+ # Project-specific imports
10
+ from utils.retry_mechanism import RetryMechanism
11
+ from data_models.metrics import AgentMetrics, TimeSeriesMetric
12
+
13
+ # Configure logger for this module
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Define the model globally or pass it as a parameter. For now, using a constant.
17
+ # Consider moving this to a shared config or environment variable.
18
+ DEFAULT_AGENT_MODEL = "gemini-1.5-flash-latest" # Or your specific model like "gemini-1.5-flash-preview-05-20"
19
+
20
+ class EnhancedFollowerAnalysisAgent:
21
+ """
22
+ Enhanced follower analysis agent with proper handling of different follower count types
23
+ and structured metric extraction.
24
+ """
25
+
26
+ AGENT_NAME = "follower_analyst"
27
+ AGENT_DESCRIPTION = "Expert analyst specializing in follower growth patterns and demographic analysis."
28
+ AGENT_INSTRUCTION = """
29
+ You are a specialized LinkedIn follower analytics expert focused on temporal patterns and demographic trends.
30
+
31
+ Your role includes:
32
+
33
+ 1. FOLLOWER TREND ANALYSIS:
34
+ - Analyze follower growth trends over time (monthly data from 'follower_gains_monthly' type).
35
+ - Identify growth acceleration/deceleration periods.
36
+ - Calculate growth rates and velocity changes.
37
+ - Detect seasonal patterns and anomalies.
38
+ - Analyze organic vs paid follower counts over time.
39
+
40
+ 2. DEMOGRAPHIC ANALYSIS (based on 'follower_industry', 'follower_seniority', etc.):
41
+ - Analyze follower distribution by industry, seniority, function, and geography.
42
+ - Compare organic vs paid followers across these demographic segments.
43
+ - Identify high-value audience segments based on counts and potential engagement.
44
+
45
+ 3. TIME-BASED INSIGHTS:
46
+ - Provide month-over-month comparisons for growth data.
47
+ - Identify critical inflection points in follower growth.
48
+ - Calculate trend momentum and acceleration.
49
+
50
+ 4. METRIC EXTRACTION (for the AgentMetrics structure):
51
+ - Extract time-series data for total, organic, and paid follower counts, and growth rates.
52
+ - Provide aggregate metrics like average monthly growth, total organic/paid followers.
53
+ - Provide demographic breakdowns as categorical metrics (e.g., top N industries by follower count).
54
+
55
+ Focus on separating temporal analysis (monthly) from demographic analysis.
56
+ When analyzing demographics, consider the top N segments (e.g., top 10 industries) for conciseness.
57
+ Ensure your analysis summary is comprehensive and insightful.
58
+ """
59
+
60
+ def __init__(self, api_key: str, model_name: Optional[str] = None):
61
+ """
62
+ Initializes the Follower Analysis Agent.
63
+
64
+ Args:
65
+ api_key: API key for LLM and potentially PandasAI.
66
+ model_name: Name of the language model to use. Defaults to DEFAULT_AGENT_MODEL.
67
+ """
68
+ self.api_key = api_key # May be used if PandasAI is configured per agent or for other API calls
69
+ self.model_name = model_name or DEFAULT_AGENT_MODEL
70
+
71
+ self.agent = LlmAgent(
72
+ name=self.AGENT_NAME,
73
+ model=self.model_name,
74
+ description=self.AGENT_DESCRIPTION,
75
+ instruction=self.AGENT_INSTRUCTION
76
+ )
77
+ self.retry_mechanism = RetryMechanism()
78
+ logger.info(f"{self.AGENT_NAME} initialized with model {self.model_name}.")
79
+
80
+ def _separate_follower_data_by_type(self, df: pd.DataFrame) -> Dict[str, pd.DataFrame]:
81
+ """Separate follower data by follower_count_type and process appropriately."""
82
+ separated_data = {}
83
+
84
+ if df is None or df.empty or 'follower_count_type' not in df.columns:
85
+ logger.warning("Input DataFrame is empty or 'follower_count_type' column is missing.")
86
+ return separated_data
87
+
88
+ # Define the expected follower count types
89
+ # These should match the 'follower_count_type' values in your Bubble data
90
+ follower_types = [
91
+ 'follower_gains_monthly', # For time-series analysis
92
+ 'follower_industry', # For demographic analysis
93
+ 'follower_seniority',
94
+ 'follower_function',
95
+ 'follower_geo'
96
+ ]
97
+
98
+ for ftype in follower_types:
99
+ type_data = df[df['follower_count_type'] == ftype].copy()
100
+ if not type_data.empty:
101
+ if ftype == 'follower_gains_monthly':
102
+ type_data = self._process_monthly_data(type_data)
103
+ else: # Demographic data
104
+ type_data = self._get_top_demographic_segments(type_data, top_n=10)
105
+ separated_data[ftype] = type_data
106
+ else:
107
+ logger.info(f"No data found for follower_count_type: {ftype}")
108
+
109
+ return separated_data
110
+
111
+ def _get_top_demographic_segments(self, demo_df: pd.DataFrame, top_n: int = 10) -> pd.DataFrame:
112
+ """Get top N demographic segments by total follower count (organic + paid)."""
113
+ if demo_df.empty:
114
+ return demo_df
115
+
116
+ # Ensure required columns exist and are numeric, fill NaNs with 0 for sum
117
+ demo_df = demo_df.copy() # Work on a copy
118
+ demo_df['follower_count_organic'] = pd.to_numeric(demo_df.get('follower_count_organic'), errors='coerce').fillna(0)
119
+ demo_df['follower_count_paid'] = pd.to_numeric(demo_df.get('follower_count_paid'), errors='coerce').fillna(0)
120
+
121
+ demo_df['total_followers'] = demo_df['follower_count_organic'] + demo_df['follower_count_paid']
122
+
123
+ # Sort by total followers and take top N
124
+ # 'category_name' usually holds the demographic label (e.g., industry name)
125
+ if 'category_name' not in demo_df.columns:
126
+ logger.warning("'_get_top_demographic_segments' expects 'category_name' column for grouping.")
127
+ return demo_df.drop(columns=['total_followers'], errors='ignore')
128
+
129
+ # Group by category_name if there are multiple entries for the same category, sum followers
130
+ # This step might be redundant if data is already aggregated per category_name
131
+ # demo_df_grouped = demo_df.groupby('category_name').agg(
132
+ # follower_count_organic=('follower_count_organic', 'sum'),
133
+ # follower_count_paid=('follower_count_paid', 'sum'),
134
+ # total_followers=('total_followers', 'sum')
135
+ # ).reset_index()
136
+
137
+ top_segments = demo_df.nlargest(top_n, 'total_followers')
138
+
139
+ return top_segments.drop(columns=['total_followers'], errors='ignore')
140
+
141
+
142
+ def _process_monthly_data(self, monthly_df: pd.DataFrame) -> pd.DataFrame:
143
+ """Process monthly follower data: parse dates, sort."""
144
+ if monthly_df.empty or 'category_name' not in monthly_df.columns:
145
+ logger.warning("Monthly data DataFrame is empty or 'category_name' column is missing.")
146
+ return monthly_df
147
+
148
+ df_processed = monthly_df.copy()
149
+
150
+ # 'category_name' for monthly data is expected to be a date string like 'YYYY-MM-DD'
151
+ # Attempt to convert 'category_name' to datetime
152
+ df_processed['date_for_analysis'] = pd.to_datetime(df_processed['category_name'], errors='coerce')
153
+
154
+ # Drop rows where date conversion failed
155
+ df_processed.dropna(subset=['date_for_analysis'], inplace=True)
156
+
157
+ if df_processed.empty:
158
+ logger.warning("No valid dates found in 'category_name' for monthly data after processing.")
159
+ return df_processed
160
+
161
+ df_processed['year_month'] = df_processed['date_for_analysis'].dt.strftime('%Y-%m')
162
+ df_processed['month_name'] = df_processed['date_for_analysis'].dt.strftime('%B %Y')
163
+
164
+ # Ensure numeric types for follower counts
165
+ for col in ['follower_count_organic', 'follower_count_paid']:
166
+ if col in df_processed.columns:
167
+ df_processed[col] = pd.to_numeric(df_processed[col], errors='coerce').fillna(0)
168
+ else: # Add column with zeros if missing, to prevent errors in later calculations
169
+ df_processed[col] = 0
170
+
171
+
172
+ return df_processed.sort_values('date_for_analysis')
173
+
174
+ def _extract_time_series_metrics(self, monthly_df: pd.DataFrame) -> List[TimeSeriesMetric]:
175
+ """Extract time-series metrics from processed monthly follower data."""
176
+ ts_metrics = []
177
+ if monthly_df.empty or 'date_for_analysis' not in monthly_df.columns:
178
+ logger.info("Cannot extract time-series metrics: monthly DataFrame is empty or lacks 'date_for_analysis'.")
179
+ return ts_metrics
180
+
181
+ # Ensure data is sorted by date for correct growth rate calculation
182
+ monthly_df_sorted = monthly_df.sort_values('date_for_analysis').copy()
183
+
184
+ timestamps = monthly_df_sorted['year_month'].tolist()
185
+
186
+ # Calculate total followers
187
+ monthly_df_sorted['total_followers'] = monthly_df_sorted.get('follower_count_organic', 0) + \
188
+ monthly_df_sorted.get('follower_count_paid', 0)
189
+
190
+ metric_definitions = {
191
+ "total_follower_count": monthly_df_sorted['total_followers'],
192
+ "organic_follower_count": monthly_df_sorted.get('follower_count_organic', pd.Series(0, index=monthly_df_sorted.index)),
193
+ "paid_follower_count": monthly_df_sorted.get('follower_count_paid', pd.Series(0, index=monthly_df_sorted.index))
194
+ }
195
+
196
+ for name, values_series in metric_definitions.items():
197
+ ts_metrics.append(TimeSeriesMetric(
198
+ metric_name=name,
199
+ values=values_series.tolist(),
200
+ timestamps=timestamps,
201
+ metric_type="time_series",
202
+ time_granularity="monthly"
203
+ ))
204
+
205
+ # Calculate growth rate for total followers
206
+ if len(monthly_df_sorted) > 1:
207
+ # pct_change gives NaN for the first element, fill with 0
208
+ growth_rates = monthly_df_sorted['total_followers'].pct_change().fillna(0).tolist()
209
+ ts_metrics.append(TimeSeriesMetric(
210
+ metric_name="total_follower_growth_rate",
211
+ values=growth_rates,
212
+ timestamps=timestamps, # Timestamps align, first growth rate is vs non-existent previous point (so 0)
213
+ metric_type="time_series",
214
+ time_granularity="monthly",
215
+ unit="%"
216
+ ))
217
+ else:
218
+ logger.info("Not enough data points (<=1) to calculate growth rate.")
219
+
220
+ return ts_metrics
221
+
222
+ def _calculate_aggregate_metrics(self, separated_data: Dict[str, pd.DataFrame]) -> Dict[str, float]:
223
+ """Calculate aggregate metrics from all follower data."""
224
+ agg_metrics = {}
225
+
226
+ monthly_df = separated_data.get('follower_gains_monthly')
227
+ if monthly_df is not None and not monthly_df.empty:
228
+ total_organic = monthly_df['follower_count_organic'].sum()
229
+ total_paid = monthly_df['follower_count_paid'].sum()
230
+ total_all_followers = total_organic + total_paid
231
+
232
+ agg_metrics['total_organic_followers_gained_period'] = float(total_organic)
233
+ agg_metrics['total_paid_followers_gained_period'] = float(total_paid)
234
+ agg_metrics['overall_total_followers_gained_period'] = float(total_all_followers)
235
+
236
+ if total_all_followers > 0:
237
+ agg_metrics['overall_organic_follower_ratio_gained'] = float(total_organic / total_all_followers)
238
+ agg_metrics['overall_paid_follower_ratio_gained'] = float(total_paid / total_all_followers)
239
+
240
+ # Average monthly gain (if 'total_followers' represents gain, not cumulative)
241
+ # Assuming 'follower_count_organic/paid' in 'follower_gains_monthly' are indeed GAINS for that month
242
+ monthly_df['monthly_total_gain'] = monthly_df['follower_count_organic'] + monthly_df['follower_count_paid']
243
+ if not monthly_df['monthly_total_gain'].empty:
244
+ agg_metrics['avg_monthly_follower_gain'] = float(monthly_df['monthly_total_gain'].mean())
245
+ agg_metrics['max_monthly_follower_gain'] = float(monthly_df['monthly_total_gain'].max())
246
+ agg_metrics['min_monthly_follower_gain'] = float(monthly_df['monthly_total_gain'].min())
247
+
248
+
249
+ # Count of distinct demographic segments identified (top N for each)
250
+ for demo_type in ['follower_industry', 'follower_seniority', 'follower_function', 'follower_geo']:
251
+ if demo_type in separated_data and not separated_data[demo_type].empty:
252
+ agg_metrics[f'distinct_{demo_type}_segments_analyzed'] = float(len(separated_data[demo_type]))
253
+
254
+ return agg_metrics
255
+
256
+ def _extract_demographic_metrics(self, separated_data: Dict[str, pd.DataFrame]) -> Dict[str, Any]:
257
+ """Extract demographic distributions (categorical metrics)."""
258
+ cat_metrics = {}
259
+ demographic_types_map = {
260
+ 'follower_industry': 'industry_distribution',
261
+ 'follower_seniority': 'seniority_distribution',
262
+ 'follower_function': 'function_distribution',
263
+ 'follower_geo': 'geographic_distribution'
264
+ }
265
+
266
+ for demo_type_key, metric_name_prefix in demographic_types_map.items():
267
+ demo_df = separated_data.get(demo_type_key)
268
+ if demo_df is not None and not demo_df.empty and 'category_name' in demo_df.columns:
269
+ distribution = {}
270
+ for _, row in demo_df.iterrows():
271
+ category = row['category_name']
272
+ organic = float(row.get('follower_count_organic', 0))
273
+ paid = float(row.get('follower_count_paid', 0))
274
+ total = organic + paid
275
+ distribution[category] = {
276
+ 'total_followers': total,
277
+ 'organic_followers': organic,
278
+ 'paid_followers': paid,
279
+ 'organic_ratio': organic / total if total > 0 else 0.0
280
+ }
281
+
282
+ # Sort by total followers descending for the distribution
283
+ sorted_distribution = dict(sorted(distribution.items(), key=lambda item: item[1]['total_followers'], reverse=True))
284
+ cat_metrics[metric_name_prefix] = sorted_distribution
285
+
286
+ # Summary for this demographic type
287
+ total_followers_in_type = sum(item['total_followers'] for item in distribution.values())
288
+ cat_metrics[f'{metric_name_prefix}_summary'] = {
289
+ 'total_followers_in_top_segments': total_followers_in_type,
290
+ 'number_of_segments_reported': len(distribution),
291
+ 'top_segment': list(sorted_distribution.keys())[0] if sorted_distribution else "N/A"
292
+ }
293
+ return cat_metrics
294
+
295
+ def _extract_time_periods(self, monthly_df: Optional[pd.DataFrame]) -> List[str]:
296
+ """Extract unique year-month time periods covered by the monthly data."""
297
+ if monthly_df is None or monthly_df.empty or 'year_month' not in monthly_df.columns:
298
+ return ["Data period not available or N/A"]
299
+
300
+ periods = sorted(monthly_df['year_month'].dropna().unique().tolist(), reverse=True)
301
+ return periods[:12] # Return up to the last 12 months if available
302
+
303
+
304
+ def analyze_follower_data(self, follower_stats_df: pd.DataFrame) -> AgentMetrics:
305
+ """
306
+ Generate comprehensive follower analysis using PandasAI and structured metric extraction.
307
+ """
308
+ if follower_stats_df is None or follower_stats_df.empty:
309
+ logger.warning("Follower statistics DataFrame is empty. Returning empty metrics.")
310
+ return AgentMetrics(
311
+ agent_name=self.AGENT_NAME,
312
+ analysis_summary="No follower data provided for analysis.",
313
+ time_periods_covered=["N/A"]
314
+ )
315
+
316
+ # 1. Pre-process and separate data
317
+ separated_data = self._separate_follower_data_by_type(follower_stats_df)
318
+
319
+ # Prepare a combined DataFrame for PandasAI if needed, or use the original one.
320
+ # For PandasAI, it's often better to provide a clean, understandable DataFrame.
321
+ # Let's use the original df for the textual analysis by PandasAI,
322
+ # as it contains all types and the LLM can be instructed to differentiate.
323
+
324
+ # Ensure PandasAI is configured (this should ideally be done once at orchestrator level)
325
+ # from utils.pandasai_setup import configure_pandasai
326
+ # configure_pandasai(self.api_key, self.model_name) # Or pass LLM object if configured outside
327
+
328
+ df_description = "LinkedIn follower statistics. Contains 'follower_count_type' indicating data category (e.g., 'follower_gains_monthly', 'follower_industry'), 'category_name' (e.g., date for monthly, industry name for industry type), 'follower_count_organic', 'follower_count_paid'."
329
+
330
+ # Create PandasAI DataFrame
331
+ # Check if pai.DataFrame is the correct way to initialize based on your pandasai version
332
+ try:
333
+ pandas_ai_df = pai.DataFrame(follower_stats_df, description=df_description)
334
+ except Exception as e:
335
+ logger.error(f"Failed to create PandasAI DataFrame: {e}", exc_info=True)
336
+ return AgentMetrics(
337
+ agent_name=self.AGENT_NAME,
338
+ analysis_summary=f"Error initializing PandasAI: {e}",
339
+ time_periods_covered=self._extract_time_periods(separated_data.get('follower_gains_monthly'))
340
+ )
341
+
342
+ # 2. Generate textual analysis using PandasAI via LlmAgent
343
+ # The LlmAgent itself doesn't directly use PandasAI's .chat() method.
344
+ # The instruction for LlmAgent should guide it to perform analysis.
345
+ # If direct PandasAI chat is needed, it's a separate call.
346
+ # The original code uses pandas_df.chat(analysis_query). This implies PandasAI is used directly.
347
+ # Let's stick to the direct PandasAI chat call as in the original structure.
348
+
349
+ analysis_query = f"""
350
+ Analyze the provided LinkedIn follower statistics. The DataFrame contains various 'follower_count_type' values.
351
+ Focus on:
352
+ 1. For 'follower_gains_monthly': Analyze monthly follower growth trends (total, organic, paid). Identify key periods of growth or decline.
353
+ 2. For demographic types (industry, seniority, function, geo): Describe the distribution of followers. Which are the top segments? How do organic vs paid compare?
354
+ 3. Synthesize these findings into an overall summary of follower dynamics.
355
+
356
+ Consider the data structure: 'category_name' holds the date for monthly data or the demographic label.
357
+ 'follower_count_organic' and 'follower_count_paid' are the key metrics.
358
+ """
359
+
360
+ analysis_result_text = "PandasAI analysis could not be performed." # Default
361
+ try:
362
+ def chat_operation():
363
+ # Ensure the LLM for PandasAI is correctly configured before this call
364
+ # This might involve re-calling configure_pandasai if it's not persistent
365
+ # or if the LLM object needs to be explicitly passed to PandasAI DataFrame.
366
+ if not pai.config.llm: # Check if LLM is set for PandasAI
367
+ logger.warning("PandasAI LLM not configured. Attempting to configure now.")
368
+ # This assumes configure_pandasai is available and sets pai.config.llm
369
+ from utils.pandasai_setup import configure_pandasai
370
+ configure_pandasai(self.api_key, self.model_name)
371
+ if not pai.config.llm:
372
+ raise RuntimeError("PandasAI LLM could not be configured for chat operation.")
373
+
374
+ logger.info(f"Executing PandasAI chat for follower analysis with LLM: {pai.config.llm}")
375
+ return pandas_ai_df.chat(analysis_query)
376
+
377
+ analysis_result_raw = self.retry_mechanism.retry_with_backoff(
378
+ func=chat_operation,
379
+ max_retries=2, # Adjusted retries
380
+ base_delay=2.0,
381
+ exceptions=(Exception,) # Catch broader exceptions for PandasAI calls
382
+ )
383
+ analysis_result_text = str(analysis_result_raw) if analysis_result_raw else "No textual analysis generated by PandasAI."
384
+ logger.info("Follower analysis via PandasAI completed.")
385
+
386
+ except Exception as e:
387
+ logger.error(f"Follower analysis with PandasAI failed after retries: {e}", exc_info=True)
388
+ analysis_result_text = f"Follower analysis using PandasAI failed. Error: {str(e)[:200]}"
389
+
390
+ # 3. Extract structured metrics using the separated and processed data
391
+ monthly_data_for_metrics = separated_data.get('follower_gains_monthly', pd.DataFrame())
392
+
393
+ time_series_metrics = self._extract_time_series_metrics(monthly_data_for_metrics)
394
+ aggregate_metrics = self._calculate_aggregate_metrics(separated_data) # Uses all separated types
395
+ categorical_metrics = self._extract_demographic_metrics(separated_data) # Uses demographic types
396
+ time_periods = self._extract_time_periods(monthly_data_for_metrics)
397
+
398
+ return AgentMetrics(
399
+ agent_name=self.AGENT_NAME,
400
+ analysis_summary=analysis_result_text[:2000], # Truncate if too long
401
+ time_series_metrics=time_series_metrics,
402
+ aggregate_metrics=aggregate_metrics,
403
+ categorical_metrics=categorical_metrics,
404
+ time_periods_covered=time_periods,
405
+ data_sources_used=[f"follower_stats_df (shape: {follower_stats_df.shape})"]
406
+ )
407
+
408
+ if __name__ == '__main__':
409
+ # This is for example and testing purposes.
410
+ # Ensure logging and other necessary setups are done.
411
+ try:
412
+ from utils.logging_config import setup_logging
413
+ setup_logging()
414
+ logger.info("Logging setup for EnhancedFollowerAnalysisAgent test.")
415
+ except ImportError:
416
+ logging.basicConfig(level=logging.INFO)
417
+ logger.warning("Could not import setup_logging. Using basicConfig.")
418
+
419
+ # Mock API Key and Model for testing
420
+ # IMPORTANT: For PandasAI to run, a valid API key and model setup are needed.
421
+ # This example might not fully execute PandasAI chat without proper environment setup.
422
+ MOCK_API_KEY = os.environ.get("GOOGLE_API_KEY", "test_api_key_followers")
423
+ MODEL_NAME = DEFAULT_AGENT_MODEL
424
+
425
+ # Configure PandasAI (essential for the .chat() part)
426
+ try:
427
+ from utils.pandasai_setup import configure_pandasai
428
+ if MOCK_API_KEY != "test_api_key_followers": # Only configure if a real key might be present
429
+ configure_pandasai(MOCK_API_KEY, MODEL_NAME)
430
+ logger.info("PandasAI configured for testing EnhancedFollowerAnalysisAgent.")
431
+ else:
432
+ logger.warning("Using mock API key. PandasAI chat will likely fail or use a default/mock LLM if available.")
433
+ # Mock pai.DataFrame if pandasai is not fully set up to avoid errors
434
+ class MockPandasAIDataFrame:
435
+ def __init__(self, df, description): self.df = df; self.description = description
436
+ def chat(self, query): return f"Mock PandasAI response to: {query}"
437
+ pai.DataFrame = MockPandasAIDataFrame
438
+
439
+ except ImportError:
440
+ logger.error("utils.pandasai_setup not found. PandasAI will not be configured.")
441
+ class MockPandasAIDataFrame:
442
+ def __init__(self, df, description): self.df = df; self.description = description
443
+ def chat(self, query): return f"Mock PandasAI response to: {query}"
444
+ pai.DataFrame = MockPandasAIDataFrame
445
+
446
+ # Sample Data
447
+ sample_follower_data = {
448
+ 'follower_count_type': [
449
+ 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
450
+ 'follower_industry', 'follower_industry', 'follower_industry', 'follower_industry',
451
+ 'follower_seniority', 'follower_seniority'
452
+ ],
453
+ 'category_name': [ # Dates for monthly, names for demographics
454
+ '2023-01-01', '2023-02-01', '2023-03-01',
455
+ 'Technology', 'Finance', 'Healthcare', 'Retail',
456
+ 'Senior', 'Entry-Level'
457
+ ],
458
+ 'follower_count_organic': [
459
+ 100, 120, 110, # Monthly gains
460
+ 500, 300, 200, 150, # Industry organic
461
+ 600, 400 # Seniority organic
462
+ ],
463
+ 'follower_count_paid': [
464
+ 10, 15, 12, # Monthly gains
465
+ 50, 30, 20, 10, # Industry paid
466
+ 60, 40 # Seniority paid
467
+ ]
468
+ }
469
+ sample_df = pd.DataFrame(sample_follower_data)
470
+
471
+ # Initialize agent
472
+ follower_agent = EnhancedFollowerAnalysisAgent(api_key=MOCK_API_KEY, model_name=MODEL_NAME)
473
+
474
+ logger.info("Analyzing sample follower data...")
475
+ metrics_result = follower_agent.analyze_follower_data(sample_df)
476
+
477
+ print("\n--- EnhancedFollowerAnalysisAgent Results ---")
478
+ print(f"Agent Name: {metrics_result.agent_name}")
479
+ print(f"Analysis Summary: {metrics_result.analysis_summary}")
480
+ print("\nTime Series Metrics:")
481
+ for ts_metric in metrics_result.time_series_metrics:
482
+ print(f" - {ts_metric.metric_name}: {len(ts_metric.values)} data points, e.g., {ts_metric.values[:3]} for ts {ts_metric.timestamps[:3]}")
483
+ print("\nAggregate Metrics:")
484
+ for key, value in metrics_result.aggregate_metrics.items():
485
+ print(f" - {key}: {value}")
486
+ print("\nCategorical Metrics:")
487
+ for key, value in metrics_result.categorical_metrics.items():
488
+ print(f" - {key}: (details below)")
489
+ if isinstance(value, dict):
490
+ for sub_key, sub_value in list(value.items())[:2]: # Print first 2 items for brevity
491
+ print(f" - {sub_key}: {sub_value}")
492
+ else:
493
+ print(f" {value}")
494
+
495
+ print(f"\nTime Periods Covered: {metrics_result.time_periods_covered}")
496
+ print(f"Data Sources Used: {metrics_result.data_sources_used}")
497
+ print(f"Generated Timestamp: {metrics_result.generation_timestamp}")
498
+
499
+ # Test with empty DataFrame
500
+ logger.info("\n--- Testing with empty DataFrame ---")
501
+ empty_metrics_result = follower_agent.analyze_follower_data(pd.DataFrame())
502
+ print(f"Empty DF Analysis Summary: {empty_metrics_result.analysis_summary}")