Spaces:
Sleeping
Sleeping
Update index.py
Browse files
index.py
CHANGED
@@ -40,9 +40,13 @@ df["Veículos de notícias"] = df["domain_folder_name"]
|
|
40 |
|
41 |
df['FinBERT_label'] = df['FinBERT_label'].astype(str)
|
42 |
df['FinBERT_label'].replace({
|
43 |
-
'3.0': 'positive',
|
44 |
-
'2.0': 'neutral',
|
45 |
-
'1.0': 'negative'
|
|
|
|
|
|
|
|
|
46 |
}, inplace=True)
|
47 |
|
48 |
|
@@ -51,9 +55,9 @@ counts = df.groupby(['date', 'Topic', 'domain_folder_name', 'FinBERT_label']).si
|
|
51 |
counts['count'] = counts['count'].astype('float64')
|
52 |
counts['rolling_mean_counts'] = counts['count'].rolling(window=30, min_periods=2).mean()
|
53 |
|
54 |
-
df_pos = counts[[x in ['
|
55 |
-
df_neu = counts[[x in ['
|
56 |
-
df_neg = counts[[x in ['
|
57 |
|
58 |
|
59 |
# app.layout
|
@@ -261,7 +265,7 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
|
|
261 |
date_range = pd.date_range(start=start_date, end=end_date)
|
262 |
|
263 |
# Create a DataFrame with all possible combinations of classes, topics, and dates
|
264 |
-
all_combinations = pd.MultiIndex.from_product([['
|
265 |
[selected_topic],
|
266 |
[selected_domain],
|
267 |
date_range],
|
@@ -270,7 +274,12 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
|
|
270 |
|
271 |
# Merge filtered DataFrame with DataFrame of all combinations
|
272 |
merged_df = pd.merge(df_all_combinations, df_filtered_2, on=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'], how='left')
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
274 |
# Fill missing values with zeros
|
275 |
merged_df['count'].fillna(0, inplace=True)
|
276 |
merged_df['rolling_mean_counts'].fillna(0, inplace=True)
|
@@ -286,12 +295,10 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
|
|
286 |
xaxis=dict(tickformat="%b %d<br>%Y"))
|
287 |
# line-fig 2 ends
|
288 |
|
289 |
-
#
|
290 |
-
label_translation = {'positive': 'positivo', 'neutral': 'neutro', 'negative': 'negativo'}
|
291 |
-
df_filtered['FinBERT_label_transformed'] = df_filtered['FinBERT_label'].map(label_translation)
|
292 |
|
293 |
# Group by FinBERT_label and count occurrences
|
294 |
-
label_counts_all = df_filtered['
|
295 |
|
296 |
# Calculate percentage of each label
|
297 |
label_percentages_all = (label_counts_all / label_counts_all.sum()) * 100
|
@@ -315,7 +322,7 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
|
|
315 |
media_df = df_filtered[df_filtered['Veículos de notícias'] == selected_domain]
|
316 |
|
317 |
# Group by FinBERT_label and count occurrences
|
318 |
-
label_counts = media_df['
|
319 |
|
320 |
# Calculate percentage of each label
|
321 |
label_percentages = (label_counts / label_counts.sum()) * 100
|
@@ -331,12 +338,12 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
|
|
331 |
# pie_chart_2 = html.Div(fig,className='four columns')
|
332 |
|
333 |
# Convert FinBERT_label to categorical for better sorting
|
334 |
-
media_df['
|
335 |
categories=['positivo', 'neutro', 'negativo'],
|
336 |
ordered=True)
|
337 |
|
338 |
# Sort DataFrame by sentiment label and date
|
339 |
-
data_table_1 = media_df.sort_values(by=['date', "
|
340 |
|
341 |
return line_fig_1, bar_fig_1, pie_chart_1, line_fig_2, pie_chart_2, data_table_1.to_dict('records')
|
342 |
else:
|
|
|
40 |
|
41 |
df['FinBERT_label'] = df['FinBERT_label'].astype(str)
|
42 |
df['FinBERT_label'].replace({
|
43 |
+
# '3.0': 'positive',
|
44 |
+
# '2.0': 'neutral',
|
45 |
+
# '1.0': 'negative'
|
46 |
+
'3.0': 'positivo',
|
47 |
+
'2.0': 'neutro',
|
48 |
+
'1.0': 'negativo'
|
49 |
+
|
50 |
}, inplace=True)
|
51 |
|
52 |
|
|
|
55 |
counts['count'] = counts['count'].astype('float64')
|
56 |
counts['rolling_mean_counts'] = counts['count'].rolling(window=30, min_periods=2).mean()
|
57 |
|
58 |
+
df_pos = counts[[x in ['positivo'] for x in counts.FinBERT_label]]
|
59 |
+
df_neu = counts[[x in ['neutro'] for x in counts.FinBERT_label]]
|
60 |
+
df_neg = counts[[x in ['negativo'] for x in counts.FinBERT_label]]
|
61 |
|
62 |
|
63 |
# app.layout
|
|
|
265 |
date_range = pd.date_range(start=start_date, end=end_date)
|
266 |
|
267 |
# Create a DataFrame with all possible combinations of classes, topics, and dates
|
268 |
+
all_combinations = pd.MultiIndex.from_product([['positivo', 'neutro', 'negativo'],
|
269 |
[selected_topic],
|
270 |
[selected_domain],
|
271 |
date_range],
|
|
|
274 |
|
275 |
# Merge filtered DataFrame with DataFrame of all combinations
|
276 |
merged_df = pd.merge(df_all_combinations, df_filtered_2, on=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'], how='left')
|
277 |
+
|
278 |
+
# Map original labels to their translated versions
|
279 |
+
label_translation = {'positive': 'positivo', 'neutral': 'neutro', 'negative': 'negativo'}
|
280 |
+
|
281 |
+
# merged_df['FinBERT_label_transformed'] = merged_df['FinBERT_label'].map(label_translation)
|
282 |
+
|
283 |
# Fill missing values with zeros
|
284 |
merged_df['count'].fillna(0, inplace=True)
|
285 |
merged_df['rolling_mean_counts'].fillna(0, inplace=True)
|
|
|
295 |
xaxis=dict(tickformat="%b %d<br>%Y"))
|
296 |
# line-fig 2 ends
|
297 |
|
298 |
+
# df_filtered['FinBERT_label_transformed'] = df_filtered['FinBERT_label'].map(label_translation)
|
|
|
|
|
299 |
|
300 |
# Group by FinBERT_label and count occurrences
|
301 |
+
label_counts_all = df_filtered['FinBERT_label'].value_counts()
|
302 |
|
303 |
# Calculate percentage of each label
|
304 |
label_percentages_all = (label_counts_all / label_counts_all.sum()) * 100
|
|
|
322 |
media_df = df_filtered[df_filtered['Veículos de notícias'] == selected_domain]
|
323 |
|
324 |
# Group by FinBERT_label and count occurrences
|
325 |
+
label_counts = media_df['FinBERT_label'].value_counts()
|
326 |
|
327 |
# Calculate percentage of each label
|
328 |
label_percentages = (label_counts / label_counts.sum()) * 100
|
|
|
338 |
# pie_chart_2 = html.Div(fig,className='four columns')
|
339 |
|
340 |
# Convert FinBERT_label to categorical for better sorting
|
341 |
+
media_df['FinBERT_label'] = pd.Categorical(media_df['FinBERT_label'],
|
342 |
categories=['positivo', 'neutro', 'negativo'],
|
343 |
ordered=True)
|
344 |
|
345 |
# Sort DataFrame by sentiment label and date
|
346 |
+
data_table_1 = media_df.sort_values(by=['date', "FinBERT_label"])
|
347 |
|
348 |
return line_fig_1, bar_fig_1, pie_chart_1, line_fig_2, pie_chart_2, data_table_1.to_dict('records')
|
349 |
else:
|