bardd commited on
Commit
8e6f74a
·
verified ·
1 Parent(s): 966a6a5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +27 -12
main.py CHANGED
@@ -78,19 +78,34 @@ def generate_recommendations_for_session(session_id):
78
  # Convert session data to a DataFrame
79
  raw_df = pd.DataFrame(session_data)
80
 
81
- # Aggregate data by id and action
82
- aggregated_data = raw_df.groupby(['id', 'action']).agg(
83
- presence=('action', 'size'),
84
- total_duration=('duration', 'sum')
85
- ).reset_index()
 
 
 
 
 
 
 
86
 
87
  # Create a pivot table from the aggregated data
88
- pivot_df = aggregated_data.pivot_table(
89
- index=['id'],
90
- columns='action',
91
- values=['presence', 'total_duration'],
92
- fill_value=0
93
- )
 
 
 
 
 
 
 
 
94
 
95
  # Flatten column names
96
  pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
@@ -99,7 +114,7 @@ def generate_recommendations_for_session(session_id):
99
  for col in ALL_COLUMNS:
100
  if f'presence_{col}' not in pivot_df.columns and col != 'time_spent':
101
  pivot_df[f'presence_{col}'] = 0
102
- elif col == 'time_spent' and 'total_duration_time_spent' not in pivot_df.columns:
103
  pivot_df['total_duration_time_spent'] = 0
104
 
105
  # Calculate interaction score for each row
 
78
  # Convert session data to a DataFrame
79
  raw_df = pd.DataFrame(session_data)
80
 
81
+ # Check if 'duration' column exists
82
+ if 'duration' in raw_df.columns:
83
+ # Aggregate data by id and action, including duration
84
+ aggregated_data = raw_df.groupby(['id', 'action']).agg(
85
+ presence=('action', 'size'),
86
+ total_duration=('duration', 'sum')
87
+ ).reset_index()
88
+ else:
89
+ # Aggregate data by id and action, without duration
90
+ aggregated_data = raw_df.groupby(['id', 'action']).agg(
91
+ presence=('action', 'size')
92
+ ).reset_index()
93
 
94
  # Create a pivot table from the aggregated data
95
+ if 'duration' in raw_df.columns:
96
+ pivot_df = aggregated_data.pivot_table(
97
+ index=['id'],
98
+ columns='action',
99
+ values=['presence', 'total_duration'],
100
+ fill_value=0
101
+ )
102
+ else:
103
+ pivot_df = aggregated_data.pivot_table(
104
+ index=['id'],
105
+ columns='action',
106
+ values=['presence'],
107
+ fill_value=0
108
+ )
109
 
110
  # Flatten column names
111
  pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
 
114
  for col in ALL_COLUMNS:
115
  if f'presence_{col}' not in pivot_df.columns and col != 'time_spent':
116
  pivot_df[f'presence_{col}'] = 0
117
+ elif col == 'time_spent' and 'duration' in raw_df.columns and 'total_duration_time_spent' not in pivot_df.columns:
118
  pivot_df['total_duration_time_spent'] = 0
119
 
120
  # Calculate interaction score for each row