Ashoka74 commited on
Commit
bb019ba
·
verified ·
1 Parent(s): 3de8dac

Update parsing.py

Browse files
Files changed (1) hide show
  1. parsing.py +42 -1
parsing.py CHANGED
@@ -311,11 +311,15 @@ def plot_line(df, x_column, y_columns, figsize=(12, 10), color='orange', title=N
311
 
312
  return fig
313
 
 
314
  def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None):
315
  fig, ax = plt.subplots(figsize=figsize)
316
 
317
  sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
318
 
 
 
 
319
  ax.set_title(title if title else f'{y_column} by {x_column}', color=color, fontweight='bold')
320
  ax.set_xlabel(x_column, color=color)
321
  ax.set_ylabel(y_column, color=color)
@@ -336,7 +340,6 @@ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=Non
336
  ax.tick_params(axis='y', colors='orange')
337
  ax.title.set_color('orange')
338
  ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
339
-
340
  return fig
341
 
342
  def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
@@ -490,6 +493,44 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
490
  if len(user_date_input) == 2:
491
  user_date_input = tuple(map(pd.to_datetime, user_date_input))
492
  start_date, end_date = user_date_input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  df_ = df_.loc[df_[column].between(start_date, end_date)]
494
 
495
  date_column = column
 
311
 
312
  return fig
313
 
314
+
315
  def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None):
316
  fig, ax = plt.subplots(figsize=figsize)
317
 
318
  sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
319
 
320
+ # Rotate x-axis labels
321
+ ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
322
+
323
  ax.set_title(title if title else f'{y_column} by {x_column}', color=color, fontweight='bold')
324
  ax.set_xlabel(x_column, color=color)
325
  ax.set_ylabel(y_column, color=color)
 
340
  ax.tick_params(axis='y', colors='orange')
341
  ax.title.set_color('orange')
342
  ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
 
343
  return fig
344
 
345
  def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
 
493
  if len(user_date_input) == 2:
494
  user_date_input = tuple(map(pd.to_datetime, user_date_input))
495
  start_date, end_date = user_date_input
496
+ # Determine the most appropriate time unit for plot
497
+ time_units = {
498
+ 'year': df_[column].dt.year,
499
+ 'month': df_[column].dt.to_period('M'),
500
+ 'day': df_[column].dt.date
501
+ }
502
+ unique_counts = {unit: col.nunique() for unit, col in time_units.items()}
503
+ closest_to_36 = min(unique_counts, key=lambda k: abs(unique_counts[k] - 36))
504
+
505
+ # Group by the most appropriate time unit and count occurrences
506
+ grouped = df_.groupby(time_units[closest_to_36]).size().reset_index(name='count')
507
+ grouped.columns = [column, 'count']
508
+
509
+ # Create a complete date range
510
+ if closest_to_36 == 'year':
511
+ date_range = pd.date_range(start=f"{start_date.year}-01-01", end=f"{end_date.year}-12-31", freq='YS')
512
+ elif closest_to_36 == 'month':
513
+ date_range = pd.date_range(start=start_date.replace(day=1), end=end_date + pd.offsets.MonthEnd(0), freq='MS')
514
+ else: # day
515
+ date_range = pd.date_range(start=start_date, end=end_date, freq='D')
516
+
517
+ # Create a DataFrame with the complete date range
518
+ complete_range = pd.DataFrame({column: date_range})
519
+
520
+ # Convert the date column to the appropriate format based on closest_to_36
521
+ if closest_to_36 == 'year':
522
+ complete_range[column] = complete_range[column].dt.year
523
+ elif closest_to_36 == 'month':
524
+ complete_range[column] = complete_range[column].dt.to_period('M')
525
+
526
+ # Merge the complete range with the grouped data
527
+ final_data = pd.merge(complete_range, grouped, on=column, how='left').fillna(0)
528
+
529
+ with st.status(f"Date Distributions: {column}", expanded=False) as stat:
530
+ try:
531
+ st.pyplot(plot_bar(final_data, column, 'count'))
532
+ except Exception as e:
533
+ st.error(f"Error plotting bar chart: {e}")
534
  df_ = df_.loc[df_[column].between(start_date, end_date)]
535
 
536
  date_column = column