Ashoka74 commited on
Commit
334abc2
·
verified ·
1 Parent(s): d6373e5

Update analyzing.py

Browse files
Files changed (1) hide show
  1. analyzing.py +45 -1
analyzing.py CHANGED
@@ -191,7 +191,8 @@ def plot_line(df, x_column, y_columns, figsize=(12, 10), color='orange', title=N
191
 
192
  return fig
193
 
194
- def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None):
 
195
  fig, ax = plt.subplots(figsize=figsize)
196
 
197
  sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
@@ -203,6 +204,8 @@ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=Non
203
  ax.tick_params(axis='x', colors=color)
204
  ax.tick_params(axis='y', colors=color)
205
 
 
 
206
  # Remove background
207
  fig.patch.set_alpha(0)
208
  ax.patch.set_alpha(0)
@@ -219,6 +222,7 @@ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=Non
219
 
220
  return fig
221
 
 
222
  def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
223
  fig, ax = plt.subplots(figsize=figsize)
224
 
@@ -370,6 +374,46 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
370
  if len(user_date_input) == 2:
371
  user_date_input = tuple(map(pd.to_datetime, user_date_input))
372
  start_date, end_date = user_date_input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  df_ = df_.loc[df_[column].between(start_date, end_date)]
374
 
375
  date_column = column
 
191
 
192
  return fig
193
 
194
+
195
+ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None, rotation=45):
196
  fig, ax = plt.subplots(figsize=figsize)
197
 
198
  sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
 
204
  ax.tick_params(axis='x', colors=color)
205
  ax.tick_params(axis='y', colors=color)
206
 
207
+ plt.xticks(rotation=rotation)
208
+
209
  # Remove background
210
  fig.patch.set_alpha(0)
211
  ax.patch.set_alpha(0)
 
222
 
223
  return fig
224
 
225
+
226
  def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
227
  fig, ax = plt.subplots(figsize=figsize)
228
 
 
374
  if len(user_date_input) == 2:
375
  user_date_input = tuple(map(pd.to_datetime, user_date_input))
376
  start_date, end_date = user_date_input
377
+
378
+ # Determine the most appropriate time unit for plot
379
+ time_units = {
380
+ 'year': df_[column].dt.year,
381
+ 'month': df_[column].dt.to_period('M'),
382
+ 'day': df_[column].dt.date
383
+ }
384
+ unique_counts = {unit: col.nunique() for unit, col in time_units.items()}
385
+ closest_to_36 = min(unique_counts, key=lambda k: abs(unique_counts[k] - 36))
386
+
387
+ # Group by the most appropriate time unit and count occurrences
388
+ grouped = df_.groupby(time_units[closest_to_36]).size().reset_index(name='count')
389
+ grouped.columns = [column, 'count']
390
+
391
+ # Create a complete date range
392
+ if closest_to_36 == 'year':
393
+ date_range = pd.date_range(start=f"{start_date.year}-01-01", end=f"{end_date.year}-12-31", freq='YS')
394
+ elif closest_to_36 == 'month':
395
+ date_range = pd.date_range(start=start_date.replace(day=1), end=end_date + pd.offsets.MonthEnd(0), freq='MS')
396
+ else: # day
397
+ date_range = pd.date_range(start=start_date, end=end_date, freq='D')
398
+
399
+ # Create a DataFrame with the complete date range
400
+ complete_range = pd.DataFrame({column: date_range})
401
+
402
+ # Convert the date column to the appropriate format based on closest_to_36
403
+ if closest_to_36 == 'year':
404
+ complete_range[column] = complete_range[column].dt.year
405
+ elif closest_to_36 == 'month':
406
+ complete_range[column] = complete_range[column].dt.to_period('M')
407
+
408
+ # Merge the complete range with the grouped data
409
+ final_data = pd.merge(complete_range, grouped, on=column, how='left').fillna(0)
410
+
411
+ with st.status(f"Date Distributions: {column}", expanded=False) as stat:
412
+ try:
413
+ st.pyplot(plot_bar(final_data, column, 'count'))
414
+ except Exception as e:
415
+ st.error(f"Error plotting bar chart: {e}")
416
+
417
  df_ = df_.loc[df_[column].between(start_date, end_date)]
418
 
419
  date_column = column