GuglielmoTor commited on
Commit
4761ff7
·
verified ·
1 Parent(s): 11bf9d9

Create formulas.py

Browse files
Files changed (1) hide show
  1. formulas.py +257 -0
formulas.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # formulas.py
2
+
3
+ PLOT_FORMULAS = {
4
+ "posts_activity": {
5
+ "title": "Posts Activity Over Time",
6
+ "description": (
7
+ "This chart displays the number of posts made each day over the selected period. "
8
+ "It helps in understanding posting consistency and identifying trends in content output."
9
+ ),
10
+ "calculation_steps": [
11
+ "1. Takes the posts dataset and a date column (default: 'published_at').",
12
+ "2. Converts the date column to a proper datetime format.",
13
+ "3. Removes any entries where the date is invalid.",
14
+ "4. Groups the posts by day.",
15
+ "5. Counts the number of posts for each day.",
16
+ "6. Displays the daily post count as a line graph over time."
17
+ ]
18
+ },
19
+ "mentions_activity": {
20
+ "title": "Mentions Activity Over Time",
21
+ "description": (
22
+ "This chart shows the number of mentions received each day over the selected period. "
23
+ "It's useful for tracking brand presence and engagement spikes related to mentions."
24
+ ),
25
+ "calculation_steps": [
26
+ "1. Takes the mentions dataset and a date column (default: 'date').",
27
+ "2. Converts the date column to a proper datetime format.",
28
+ "3. Removes any entries where the date is invalid.",
29
+ "4. Groups the mentions by day.",
30
+ "5. Counts the number of mentions for each day.",
31
+ "6. Displays the daily mention count as a line graph over time."
32
+ ]
33
+ },
34
+ "mention_sentiment": {
35
+ "title": "Mention Sentiment Distribution",
36
+ "description": (
37
+ "This pie chart illustrates the proportion of mentions categorized by sentiment (e.g., positive, negative, neutral). "
38
+ "It provides a quick overview of the overall tone surrounding your brand mentions."
39
+ ),
40
+ "calculation_steps": [
41
+ "1. Takes the mentions dataset and a sentiment column (default: 'sentiment_label').",
42
+ "2. Counts the occurrences of each unique sentiment value (e.g., 'Positive', 'Negative', 'Neutral').",
43
+ "3. Displays these counts as a pie chart, with each slice representing the percentage of a sentiment category."
44
+ ]
45
+ },
46
+ "followers_count_over_time": {
47
+ "title": "Followers Count Over Time",
48
+ "description": (
49
+ "This chart tracks the cumulative number of organic and paid followers over time for a specific follower metric (e.g., 'follower_gains_monthly'). "
50
+ "It helps visualize audience growth from different sources."
51
+ "The specific metric (e.g., monthly gains, total followers) is determined by the 'type_value' parameter used when generating the plot."
52
+ ),
53
+ "calculation_steps": [
54
+ "1. Takes the follower statistics dataset.",
55
+ "2. Filters the data based on a 'type_filter_column' (default: 'follower_count_type') and a specific 'type_value' (e.g., 'follower_gains_monthly').",
56
+ "3. Uses a date information column (default: 'category_name', expected to contain date strings) and converts it to datetime objects.",
57
+ "4. Extracts organic follower counts (default: 'follower_count_organic') and paid follower counts (default: 'follower_count_paid').",
58
+ "5. Ensures follower counts are numeric, replacing missing values with 0.",
59
+ "6. Removes entries with invalid dates or follower counts.",
60
+ "7. Sorts the data by date.",
61
+ "8. Plots the organic and paid follower counts as separate lines on a graph over time."
62
+ ]
63
+ },
64
+ "followers_growth_rate": {
65
+ "title": "Follower Growth Rate",
66
+ "description": (
67
+ "This chart displays the percentage change in organic and paid followers over time for a specific follower metric. "
68
+ "It highlights the speed of audience growth or decline. "
69
+ "The specific metric is determined by the 'type_value' parameter."
70
+ ),
71
+ "calculation_steps": [
72
+ "1. Similar to 'Followers Count Over Time', data is filtered and prepared based on 'type_value'.",
73
+ "2. The date column is set as the index.",
74
+ "3. Calculates the period-over-period percentage change for both organic and paid follower counts (e.g., ((current_period_followers - previous_period_followers) / previous_period_followers) * 100).",
75
+ "4. Handles any infinite values resulting from division by zero (e.g., if previous count was 0).",
76
+ "5. Plots the calculated organic and paid growth rates as separate lines on a graph over time. The Y-axis is formatted as a percentage."
77
+ ]
78
+ },
79
+ "followers_by_demographics": {
80
+ "title": "Followers by Demographics",
81
+ "description": (
82
+ "This bar chart breaks down the number of organic and paid followers by a chosen demographic category (e.g., country, industry, seniority). "
83
+ "It shows the top N categories, helping to understand audience composition. The specific demographic breakdown is determined by the 'type_value' (e.g., 'followers_by_country') and 'category_col' parameters."
84
+ ),
85
+ "calculation_steps": [
86
+ "1. Takes the follower statistics dataset.",
87
+ "2. Filters data based on 'type_filter_column' and a specific 'type_value' that defines the demographic type (e.g., 'followers_by_country').",
88
+ "3. Extracts organic and paid follower counts, ensuring they are numeric.",
89
+ "4. Groups the data by the specified 'category_col' (e.g., 'country_name') and sums the organic and paid followers for each category.",
90
+ "5. Sorts the demographic categories by the total number of followers (organic + paid) in descending order.",
91
+ "6. Displays the top N (default: 10) demographic categories.",
92
+ "7. For each top category, plots the number of organic and paid followers as a grouped bar chart."
93
+ ]
94
+ },
95
+ "engagement_rate_over_time": {
96
+ "title": "Engagement Rate Over Time",
97
+ "description": (
98
+ "This chart visualizes the average engagement rate of your posts on a daily basis. "
99
+ "Engagement rate is typically calculated as (Total Engagements / Total Impressions or Reach or Followers) * 100. "
100
+ "The specific formula for 'engagement' column should be predefined in your data."
101
+ ),
102
+ "calculation_steps": [
103
+ "1. Takes the posts dataset with a date column (default: 'published_at') and an engagement rate column (default: 'engagement').",
104
+ "2. Converts the date column to datetime and the engagement rate column to numeric.",
105
+ "3. Removes entries with invalid dates or engagement rates.",
106
+ "4. Groups the data by day.",
107
+ "5. Calculates the average engagement rate for each day.",
108
+ "6. Plots this daily average engagement rate as a line graph. The Y-axis is formatted as a percentage."
109
+ ]
110
+ },
111
+ "reach_over_time": {
112
+ "title": "Reach Over Time (Clicks)",
113
+ "description": (
114
+ "This chart shows the total number of clicks (often used as a proxy for reach for certain post types or if 'reach' data isn't directly available) on your posts, aggregated daily. "
115
+ "It helps track how many unique users potentially saw your content, based on click interactions."
116
+ ),
117
+ "calculation_steps": [
118
+ "1. Takes the posts dataset with a date column (default: 'published_at') and a clicks column (default: 'clickCount').",
119
+ "2. Converts the date column to datetime and the clicks column to numeric.",
120
+ "3. Removes entries with invalid dates or click counts.",
121
+ "4. Groups the data by day.",
122
+ "5. Calculates the sum of clicks for each day.",
123
+ "6. Plots the daily total clicks as a line graph over time."
124
+ ]
125
+ },
126
+ "impressions_over_time": {
127
+ "title": "Impressions Over Time",
128
+ "description": (
129
+ "This chart displays the total number of impressions your posts received, aggregated daily. "
130
+ "Impressions represent the total number of times your content was displayed, regardless of whether it was clicked."
131
+ ),
132
+ "calculation_steps": [
133
+ "1. Takes the posts dataset with a date column (default: 'published_at') and an impressions column (default: 'impressionCount').",
134
+ "2. Converts the date column to datetime and the impressions column to numeric.",
135
+ "3. Removes entries with invalid dates or impression counts.",
136
+ "4. Groups the data by day.",
137
+ "5. Calculates the sum of impressions for each day.",
138
+ "6. Plots the daily total impressions as a line graph over time."
139
+ ]
140
+ },
141
+ "likes_over_time": {
142
+ "title": "Reactions (Likes) Over Time",
143
+ "description": (
144
+ "This chart tracks the total number of likes (or reactions) your posts received, aggregated daily. "
145
+ "It's a key indicator of content resonance and audience appreciation."
146
+ ),
147
+ "calculation_steps": [
148
+ "1. Takes the posts dataset with a date column (default: 'published_at') and a likes column (default: 'likeCount').",
149
+ "2. Converts the date column to datetime and the likes column to numeric.",
150
+ "3. Removes entries with invalid dates or like counts.",
151
+ "4. Groups the data by day.",
152
+ "5. Calculates the sum of likes for each day.",
153
+ "6. Plots the daily total likes as a line graph over time."
154
+ ]
155
+ },
156
+ "clicks_over_time": { # Note: This reuses the reach_over_time logic
157
+ "title": "Clicks Over Time",
158
+ "description": (
159
+ "This chart shows the total number of clicks on your posts, aggregated daily. "
160
+ "It directly measures user interaction involving clicks on your content or links within it."
161
+ ),
162
+ "calculation_steps": [
163
+ "1. Takes the posts dataset with a date column (default: 'published_at') and a clicks column (default: 'clickCount').",
164
+ "2. Converts the date column to datetime and the clicks column to numeric.",
165
+ "3. Removes entries with invalid dates or click counts.",
166
+ "4. Groups the data by day.",
167
+ "5. Calculates the sum of clicks for each day.",
168
+ "6. Plots the daily total clicks as a line graph over time. (This plot uses the same calculation logic as 'Reach Over Time (Clicks)')."
169
+ ]
170
+ },
171
+ "shares_over_time": {
172
+ "title": "Shares Over Time",
173
+ "description": (
174
+ "This chart displays the total number of times your posts were shared, aggregated daily. "
175
+ "Shares are a strong indicator of content value and audience advocacy."
176
+ ),
177
+ "calculation_steps": [
178
+ "1. Takes the posts dataset with a date column (default: 'published_at') and a shares column (default: 'shareCount').",
179
+ "2. Converts the date column to datetime and the shares column to numeric.",
180
+ "3. Removes entries with invalid dates or share counts.",
181
+ "4. Groups the data by day.",
182
+ "5. Calculates the sum of shares for each day.",
183
+ "6. Plots the daily total shares as a line graph over time."
184
+ ]
185
+ },
186
+ "comments_over_time": {
187
+ "title": "Comments Over Time",
188
+ "description": (
189
+ "This chart tracks the total number of comments on your posts, aggregated daily. "
190
+ "Comments reflect audience engagement and conversation around your content."
191
+ ),
192
+ "calculation_steps": [
193
+ "1. Takes the posts dataset with a date column (default: 'published_at') and a comments column (default: 'commentCount').",
194
+ "2. Converts the date column to datetime and the comments column to numeric.",
195
+ "3. Removes entries with invalid dates or comment counts.",
196
+ "4. Groups the data by day.",
197
+ "5. Calculates the sum of comments for each day.",
198
+ "6. Plots the daily total comments as a line graph over time."
199
+ ]
200
+ },
201
+ "comments_sentiment_breakdown": {
202
+ "title": "Breakdown of Comments by Sentiment",
203
+ "description": (
204
+ "This pie chart shows the distribution of sentiments (e.g., positive, negative, neutral) expressed in the comments on your posts. "
205
+ "It helps in understanding audience reactions at a deeper level."
206
+ ),
207
+ "calculation_steps": [
208
+ "1. Takes the comments dataset (or posts dataset if comments have sentiment labels) and a sentiment column (default: 'comment_sentiment', with 'sentiment' as a fallback).",
209
+ "2. Ensures the sentiment column is treated as string data.",
210
+ "3. Counts the occurrences of each unique sentiment value.",
211
+ "4. Displays these counts as a pie chart, with each slice representing the percentage of a sentiment category in comments."
212
+ ]
213
+ },
214
+ "post_frequency": {
215
+ "title": "Post Frequency Over Time",
216
+ "description": (
217
+ "This chart visualizes how frequently posts are made, aggregated by a chosen period (Daily 'D', Weekly 'W', or Monthly 'M'). "
218
+ "It helps assess content scheduling and output rhythm. The aggregation period is determined by the 'resample_period' parameter."
219
+ ),
220
+ "calculation_steps": [
221
+ "1. Takes the posts dataset with a date column (default: 'published_at').",
222
+ "2. Converts the date column to datetime format.",
223
+ "3. Removes entries with invalid dates.",
224
+ "4. Groups posts by the specified 'resample_period' (e.g., 'D' for day, 'W' for week, 'M' for month).",
225
+ "5. Counts the number of posts within each period.",
226
+ "6. Displays the result as a line chart for daily frequency or a bar chart for weekly/monthly frequency."
227
+ ]
228
+ },
229
+ "content_format_breakdown": {
230
+ "title": "Breakdown of Content by Format",
231
+ "description": (
232
+ "This bar chart shows the distribution of your posts based on their media type or format (e.g., article, image, video). "
233
+ "It helps identify which content formats are most commonly used."
234
+ ),
235
+ "calculation_steps": [
236
+ "1. Takes the posts dataset and a format column (default: 'media_type').",
237
+ "2. Counts the occurrences of each unique media type.",
238
+ "3. Displays these counts as a bar chart, with each bar representing a media type and its height representing the number of posts."
239
+ ]
240
+ },
241
+ "content_topic_breakdown": {
242
+ "title": "Breakdown of Content by Topics",
243
+ "description": (
244
+ "This horizontal bar chart displays the most frequent topics associated with your posts, based on assigned labels (e.g., from 'li_eb_labels' column). "
245
+ "It shows the top N topics (default: 15) to highlight key themes in your content strategy."
246
+ ),
247
+ "calculation_steps": [
248
+ "1. Takes the posts dataset and a topics column (default: 'li_eb_labels') which may contain single labels or lists of labels.",
249
+ "2. Parses the topics column: if an entry is a string representation of a list, it's converted to a list; otherwise, it's treated as a single-item list. Empty or invalid entries are handled.",
250
+ "3. 'Explodes' the data so that each individual topic label from a post gets its own row.",
251
+ "4. Filters out any empty string labels that might have resulted from parsing.",
252
+ "5. Counts the occurrences of each unique topic.",
253
+ "6. Selects the top N most frequent topics (default N=15).",
254
+ "7. Displays these top topics and their counts as a horizontal bar chart."
255
+ ]
256
+ }
257
+ }