Spaces:
Sleeping
Sleeping
Update parsing.py
Browse files- parsing.py +42 -1
parsing.py
CHANGED
@@ -311,11 +311,15 @@ def plot_line(df, x_column, y_columns, figsize=(12, 10), color='orange', title=N
|
|
311 |
|
312 |
return fig
|
313 |
|
|
|
314 |
def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None):
|
315 |
fig, ax = plt.subplots(figsize=figsize)
|
316 |
|
317 |
sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
|
318 |
|
|
|
|
|
|
|
319 |
ax.set_title(title if title else f'{y_column} by {x_column}', color=color, fontweight='bold')
|
320 |
ax.set_xlabel(x_column, color=color)
|
321 |
ax.set_ylabel(y_column, color=color)
|
@@ -336,7 +340,6 @@ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=Non
|
|
336 |
ax.tick_params(axis='y', colors='orange')
|
337 |
ax.title.set_color('orange')
|
338 |
ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
|
339 |
-
|
340 |
return fig
|
341 |
|
342 |
def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
|
@@ -490,6 +493,44 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
490 |
if len(user_date_input) == 2:
|
491 |
user_date_input = tuple(map(pd.to_datetime, user_date_input))
|
492 |
start_date, end_date = user_date_input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
493 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
494 |
|
495 |
date_column = column
|
|
|
311 |
|
312 |
return fig
|
313 |
|
314 |
+
|
315 |
def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None):
|
316 |
fig, ax = plt.subplots(figsize=figsize)
|
317 |
|
318 |
sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
|
319 |
|
320 |
+
# Rotate x-axis labels
|
321 |
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
|
322 |
+
|
323 |
ax.set_title(title if title else f'{y_column} by {x_column}', color=color, fontweight='bold')
|
324 |
ax.set_xlabel(x_column, color=color)
|
325 |
ax.set_ylabel(y_column, color=color)
|
|
|
340 |
ax.tick_params(axis='y', colors='orange')
|
341 |
ax.title.set_color('orange')
|
342 |
ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
|
|
|
343 |
return fig
|
344 |
|
345 |
def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
|
|
|
493 |
if len(user_date_input) == 2:
|
494 |
user_date_input = tuple(map(pd.to_datetime, user_date_input))
|
495 |
start_date, end_date = user_date_input
|
496 |
+
# Determine the most appropriate time unit for plot
|
497 |
+
time_units = {
|
498 |
+
'year': df_[column].dt.year,
|
499 |
+
'month': df_[column].dt.to_period('M'),
|
500 |
+
'day': df_[column].dt.date
|
501 |
+
}
|
502 |
+
unique_counts = {unit: col.nunique() for unit, col in time_units.items()}
|
503 |
+
closest_to_36 = min(unique_counts, key=lambda k: abs(unique_counts[k] - 36))
|
504 |
+
|
505 |
+
# Group by the most appropriate time unit and count occurrences
|
506 |
+
grouped = df_.groupby(time_units[closest_to_36]).size().reset_index(name='count')
|
507 |
+
grouped.columns = [column, 'count']
|
508 |
+
|
509 |
+
# Create a complete date range
|
510 |
+
if closest_to_36 == 'year':
|
511 |
+
date_range = pd.date_range(start=f"{start_date.year}-01-01", end=f"{end_date.year}-12-31", freq='YS')
|
512 |
+
elif closest_to_36 == 'month':
|
513 |
+
date_range = pd.date_range(start=start_date.replace(day=1), end=end_date + pd.offsets.MonthEnd(0), freq='MS')
|
514 |
+
else: # day
|
515 |
+
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
516 |
+
|
517 |
+
# Create a DataFrame with the complete date range
|
518 |
+
complete_range = pd.DataFrame({column: date_range})
|
519 |
+
|
520 |
+
# Convert the date column to the appropriate format based on closest_to_36
|
521 |
+
if closest_to_36 == 'year':
|
522 |
+
complete_range[column] = complete_range[column].dt.year
|
523 |
+
elif closest_to_36 == 'month':
|
524 |
+
complete_range[column] = complete_range[column].dt.to_period('M')
|
525 |
+
|
526 |
+
# Merge the complete range with the grouped data
|
527 |
+
final_data = pd.merge(complete_range, grouped, on=column, how='left').fillna(0)
|
528 |
+
|
529 |
+
with st.status(f"Date Distributions: {column}", expanded=False) as stat:
|
530 |
+
try:
|
531 |
+
st.pyplot(plot_bar(final_data, column, 'count'))
|
532 |
+
except Exception as e:
|
533 |
+
st.error(f"Error plotting bar chart: {e}")
|
534 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
535 |
|
536 |
date_column = column
|