thak123 commited on
Commit
9b83c18
·
verified ·
1 Parent(s): 441df99

Update index.py

Browse files
Files changed (1) hide show
  1. index.py +22 -15
index.py CHANGED
@@ -40,9 +40,13 @@ df["Veículos de notícias"] = df["domain_folder_name"]
40
 
41
  df['FinBERT_label'] = df['FinBERT_label'].astype(str)
42
  df['FinBERT_label'].replace({
43
- '3.0': 'positive',
44
- '2.0': 'neutral',
45
- '1.0': 'negative'
 
 
 
 
46
  }, inplace=True)
47
 
48
 
@@ -51,9 +55,9 @@ counts = df.groupby(['date', 'Topic', 'domain_folder_name', 'FinBERT_label']).si
51
  counts['count'] = counts['count'].astype('float64')
52
  counts['rolling_mean_counts'] = counts['count'].rolling(window=30, min_periods=2).mean()
53
 
54
- df_pos = counts[[x in ['positive'] for x in counts.FinBERT_label]]
55
- df_neu = counts[[x in ['neutral'] for x in counts.FinBERT_label]]
56
- df_neg = counts[[x in ['negative'] for x in counts.FinBERT_label]]
57
 
58
 
59
  # app.layout
@@ -261,7 +265,7 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
261
  date_range = pd.date_range(start=start_date, end=end_date)
262
 
263
  # Create a DataFrame with all possible combinations of classes, topics, and dates
264
- all_combinations = pd.MultiIndex.from_product([['positive', 'neutral', 'negative'],
265
  [selected_topic],
266
  [selected_domain],
267
  date_range],
@@ -270,7 +274,12 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
270
 
271
  # Merge filtered DataFrame with DataFrame of all combinations
272
  merged_df = pd.merge(df_all_combinations, df_filtered_2, on=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'], how='left')
273
-
 
 
 
 
 
274
  # Fill missing values with zeros
275
  merged_df['count'].fillna(0, inplace=True)
276
  merged_df['rolling_mean_counts'].fillna(0, inplace=True)
@@ -286,12 +295,10 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
286
  xaxis=dict(tickformat="%b %d<br>%Y"))
287
  # line-fig 2 ends
288
 
289
- # Map original labels to their translated versions
290
- label_translation = {'positive': 'positivo', 'neutral': 'neutro', 'negative': 'negativo'}
291
- df_filtered['FinBERT_label_transformed'] = df_filtered['FinBERT_label'].map(label_translation)
292
 
293
  # Group by FinBERT_label and count occurrences
294
- label_counts_all = df_filtered['FinBERT_label_transformed'].value_counts()
295
 
296
  # Calculate percentage of each label
297
  label_percentages_all = (label_counts_all / label_counts_all.sum()) * 100
@@ -315,7 +322,7 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
315
  media_df = df_filtered[df_filtered['Veículos de notícias'] == selected_domain]
316
 
317
  # Group by FinBERT_label and count occurrences
318
- label_counts = media_df['FinBERT_label_transformed'].value_counts()
319
 
320
  # Calculate percentage of each label
321
  label_percentages = (label_counts / label_counts.sum()) * 100
@@ -331,12 +338,12 @@ def update_output(selected_topic, selected_domain, start_date, end_date):
331
  # pie_chart_2 = html.Div(fig,className='four columns')
332
 
333
  # Convert FinBERT_label to categorical for better sorting
334
- media_df['FinBERT_label_transformed'] = pd.Categorical(media_df['FinBERT_label_transformed'],
335
  categories=['positivo', 'neutro', 'negativo'],
336
  ordered=True)
337
 
338
  # Sort DataFrame by sentiment label and date
339
- data_table_1 = media_df.sort_values(by=['date', "FinBERT_label_transformed"])
340
 
341
  return line_fig_1, bar_fig_1, pie_chart_1, line_fig_2, pie_chart_2, data_table_1.to_dict('records')
342
  else:
 
40
 
41
  df['FinBERT_label'] = df['FinBERT_label'].astype(str)
42
  df['FinBERT_label'].replace({
43
+ # '3.0': 'positive',
44
+ # '2.0': 'neutral',
45
+ # '1.0': 'negative'
46
+ '3.0': 'positivo',
47
+ '2.0': 'neutro',
48
+ '1.0': 'negativo'
49
+
50
  }, inplace=True)
51
 
52
 
 
55
  counts['count'] = counts['count'].astype('float64')
56
  counts['rolling_mean_counts'] = counts['count'].rolling(window=30, min_periods=2).mean()
57
 
58
+ df_pos = counts[[x in ['positivo'] for x in counts.FinBERT_label]]
59
+ df_neu = counts[[x in ['neutro'] for x in counts.FinBERT_label]]
60
+ df_neg = counts[[x in ['negativo'] for x in counts.FinBERT_label]]
61
 
62
 
63
  # app.layout
 
265
  date_range = pd.date_range(start=start_date, end=end_date)
266
 
267
  # Create a DataFrame with all possible combinations of classes, topics, and dates
268
+ all_combinations = pd.MultiIndex.from_product([['positivo', 'neutro', 'negativo'],
269
  [selected_topic],
270
  [selected_domain],
271
  date_range],
 
274
 
275
  # Merge filtered DataFrame with DataFrame of all combinations
276
  merged_df = pd.merge(df_all_combinations, df_filtered_2, on=['FinBERT_label', 'Topic', 'domain_folder_name', 'date'], how='left')
277
+
278
+ # Map original labels to their translated versions
279
+ label_translation = {'positive': 'positivo', 'neutral': 'neutro', 'negative': 'negativo'}
280
+
281
+ # merged_df['FinBERT_label_transformed'] = merged_df['FinBERT_label'].map(label_translation)
282
+
283
  # Fill missing values with zeros
284
  merged_df['count'].fillna(0, inplace=True)
285
  merged_df['rolling_mean_counts'].fillna(0, inplace=True)
 
295
  xaxis=dict(tickformat="%b %d<br>%Y"))
296
  # line-fig 2 ends
297
 
298
+ # df_filtered['FinBERT_label_transformed'] = df_filtered['FinBERT_label'].map(label_translation)
 
 
299
 
300
  # Group by FinBERT_label and count occurrences
301
+ label_counts_all = df_filtered['FinBERT_label'].value_counts()
302
 
303
  # Calculate percentage of each label
304
  label_percentages_all = (label_counts_all / label_counts_all.sum()) * 100
 
322
  media_df = df_filtered[df_filtered['Veículos de notícias'] == selected_domain]
323
 
324
  # Group by FinBERT_label and count occurrences
325
+ label_counts = media_df['FinBERT_label'].value_counts()
326
 
327
  # Calculate percentage of each label
328
  label_percentages = (label_counts / label_counts.sum()) * 100
 
338
  # pie_chart_2 = html.Div(fig,className='four columns')
339
 
340
  # Convert FinBERT_label to categorical for better sorting
341
+ media_df['FinBERT_label'] = pd.Categorical(media_df['FinBERT_label'],
342
  categories=['positivo', 'neutro', 'negativo'],
343
  ordered=True)
344
 
345
  # Sort DataFrame by sentiment label and date
346
+ data_table_1 = media_df.sort_values(by=['date', "FinBERT_label"])
347
 
348
  return line_fig_1, bar_fig_1, pie_chart_1, line_fig_2, pie_chart_2, data_table_1.to_dict('records')
349
  else: