Spaces:
Sleeping
Sleeping
Update analyzing.py
Browse files- analyzing.py +45 -1
analyzing.py
CHANGED
@@ -191,7 +191,8 @@ def plot_line(df, x_column, y_columns, figsize=(12, 10), color='orange', title=N
|
|
191 |
|
192 |
return fig
|
193 |
|
194 |
-
|
|
|
195 |
fig, ax = plt.subplots(figsize=figsize)
|
196 |
|
197 |
sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
|
@@ -203,6 +204,8 @@ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=Non
|
|
203 |
ax.tick_params(axis='x', colors=color)
|
204 |
ax.tick_params(axis='y', colors=color)
|
205 |
|
|
|
|
|
206 |
# Remove background
|
207 |
fig.patch.set_alpha(0)
|
208 |
ax.patch.set_alpha(0)
|
@@ -219,6 +222,7 @@ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=Non
|
|
219 |
|
220 |
return fig
|
221 |
|
|
|
222 |
def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
|
223 |
fig, ax = plt.subplots(figsize=figsize)
|
224 |
|
@@ -370,6 +374,46 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
370 |
if len(user_date_input) == 2:
|
371 |
user_date_input = tuple(map(pd.to_datetime, user_date_input))
|
372 |
start_date, end_date = user_date_input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
374 |
|
375 |
date_column = column
|
|
|
191 |
|
192 |
return fig
|
193 |
|
194 |
+
|
195 |
+
def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None, rotation=45):
|
196 |
fig, ax = plt.subplots(figsize=figsize)
|
197 |
|
198 |
sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
|
|
|
204 |
ax.tick_params(axis='x', colors=color)
|
205 |
ax.tick_params(axis='y', colors=color)
|
206 |
|
207 |
+
plt.xticks(rotation=rotation)
|
208 |
+
|
209 |
# Remove background
|
210 |
fig.patch.set_alpha(0)
|
211 |
ax.patch.set_alpha(0)
|
|
|
222 |
|
223 |
return fig
|
224 |
|
225 |
+
|
226 |
def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
|
227 |
fig, ax = plt.subplots(figsize=figsize)
|
228 |
|
|
|
374 |
if len(user_date_input) == 2:
|
375 |
user_date_input = tuple(map(pd.to_datetime, user_date_input))
|
376 |
start_date, end_date = user_date_input
|
377 |
+
|
378 |
+
# Determine the most appropriate time unit for plot
|
379 |
+
time_units = {
|
380 |
+
'year': df_[column].dt.year,
|
381 |
+
'month': df_[column].dt.to_period('M'),
|
382 |
+
'day': df_[column].dt.date
|
383 |
+
}
|
384 |
+
unique_counts = {unit: col.nunique() for unit, col in time_units.items()}
|
385 |
+
closest_to_36 = min(unique_counts, key=lambda k: abs(unique_counts[k] - 36))
|
386 |
+
|
387 |
+
# Group by the most appropriate time unit and count occurrences
|
388 |
+
grouped = df_.groupby(time_units[closest_to_36]).size().reset_index(name='count')
|
389 |
+
grouped.columns = [column, 'count']
|
390 |
+
|
391 |
+
# Create a complete date range
|
392 |
+
if closest_to_36 == 'year':
|
393 |
+
date_range = pd.date_range(start=f"{start_date.year}-01-01", end=f"{end_date.year}-12-31", freq='YS')
|
394 |
+
elif closest_to_36 == 'month':
|
395 |
+
date_range = pd.date_range(start=start_date.replace(day=1), end=end_date + pd.offsets.MonthEnd(0), freq='MS')
|
396 |
+
else: # day
|
397 |
+
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
398 |
+
|
399 |
+
# Create a DataFrame with the complete date range
|
400 |
+
complete_range = pd.DataFrame({column: date_range})
|
401 |
+
|
402 |
+
# Convert the date column to the appropriate format based on closest_to_36
|
403 |
+
if closest_to_36 == 'year':
|
404 |
+
complete_range[column] = complete_range[column].dt.year
|
405 |
+
elif closest_to_36 == 'month':
|
406 |
+
complete_range[column] = complete_range[column].dt.to_period('M')
|
407 |
+
|
408 |
+
# Merge the complete range with the grouped data
|
409 |
+
final_data = pd.merge(complete_range, grouped, on=column, how='left').fillna(0)
|
410 |
+
|
411 |
+
with st.status(f"Date Distributions: {column}", expanded=False) as stat:
|
412 |
+
try:
|
413 |
+
st.pyplot(plot_bar(final_data, column, 'count'))
|
414 |
+
except Exception as e:
|
415 |
+
st.error(f"Error plotting bar chart: {e}")
|
416 |
+
|
417 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
418 |
|
419 |
date_column = column
|