Ashoka74 commited on
Commit
93f2f6d
·
verified ·
1 Parent(s): a37aab8

Cache classes to avoid reruns / change button tags to avoid redundency

Browse files
Files changed (1) hide show
  1. app.py +20 -7
app.py CHANGED
@@ -71,6 +71,7 @@ import streamlit as st
71
  # Configure logging
72
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
73
 
 
74
  class UAPAnalyzer:
75
  """
76
  A class for analyzing and clustering textual data within a pandas DataFrame using
@@ -785,7 +786,7 @@ def plot_cramers_v_heatmap(data, significance_level=0.05):
785
  plt.title(f"Heatmap of Cramér's V (p < {significance_level})")
786
  return plt
787
 
788
-
789
  class UAPVisualizer:
790
  def __init__(self, data=None):
791
  pass # Initialization can be added if needed
@@ -925,7 +926,7 @@ class UAPVisualizer:
925
  plt.show()
926
 
927
 
928
-
929
  class UAPParser:
930
  def __init__(self, api_key, model="gpt-3.5-turbo-0125", col=None, format_long=None):
931
  os.environ['OPENAI_API_KEY'] = api_key
@@ -1140,9 +1141,19 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
1140
  start_date, end_date = user_date_input
1141
  df_ = df_.loc[df_[column].between(start_date, end_date)]
1142
  else:
1143
- user_text_input = right.text_input(
1144
- f"Substring or regex in {column}",
1145
- )
 
 
 
 
 
 
 
 
 
 
1146
  if user_text_input:
1147
  df_ = df_[df_[column].astype(str).str.contains(user_text_input)]
1148
  # write len of df after filtering with % of original
@@ -1273,7 +1284,7 @@ def main():
1273
  st.session_state['stage'] = 1
1274
 
1275
  # Analyze data
1276
- if st.session_state.stage > 0:
1277
  columns_to_analyze = st.multiselect(
1278
  label='Select columns to analyze',
1279
  options=parsed_responses.columns
@@ -1340,13 +1351,15 @@ def main():
1340
 
1341
  if st.session_state['data_processed']:
1342
  parsed2 = st.session_state.get('dataset', pd.DataFrame())
 
 
1343
  parsed2 = filter_dataframe(parsed2)
1344
  col1, col2 = st.columns(2)
1345
  st.dataframe(parsed2)
1346
  with col1:
1347
  col_parsed2 = st.selectbox("Which column do you want to query?", parsed2.columns)
1348
  with col2:
1349
- GEMINI_KEY = st.text_input('Gemini API Key', GEMINI_KEY, type='password', help="Enter your Gemini API key")
1350
  if col_parsed and GEMINI_KEY:
1351
  selected_column_data2 = parsed2[col_parsed2].tolist()
1352
  question2 = st.text_input("Ask a question / leave empty for summarization")
 
71
  # Configure logging
72
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
73
 
74
+ @st.cache_data(ttl=1800, max_entries=30, show_spinner='Retrieving data...')
75
  class UAPAnalyzer:
76
  """
77
  A class for analyzing and clustering textual data within a pandas DataFrame using
 
786
  plt.title(f"Heatmap of Cramér's V (p < {significance_level})")
787
  return plt
788
 
789
+ @st.cache_data(ttl=1800, max_entries=30, show_spinner='Retrieving data...')
790
  class UAPVisualizer:
791
  def __init__(self, data=None):
792
  pass # Initialization can be added if needed
 
926
  plt.show()
927
 
928
 
929
+ @st.cache_data(ttl=1800, max_entries=30, show_spinner='Retrieving data...')
930
  class UAPParser:
931
  def __init__(self, api_key, model="gpt-3.5-turbo-0125", col=None, format_long=None):
932
  os.environ['OPENAI_API_KEY'] = api_key
 
1141
  start_date, end_date = user_date_input
1142
  df_ = df_.loc[df_[column].between(start_date, end_date)]
1143
  else:
1144
+ try: # To avoid multiple buttons with same ID
1145
+ user_text_input = right.text_input(
1146
+ f"Substring or regex in {column}",
1147
+ )
1148
+ except:
1149
+ try:
1150
+ user_text_input = right.text_input(
1151
+ f"Substring or regex {column}",
1152
+ )
1153
+ except Exception as e:
1154
+ print(f'Error : {e}')
1155
+ pass
1156
+
1157
  if user_text_input:
1158
  df_ = df_[df_[column].astype(str).str.contains(user_text_input)]
1159
  # write len of df after filtering with % of original
 
1284
  st.session_state['stage'] = 1
1285
 
1286
  # Analyze data
1287
+ if st.session_state.stage > 0 and st.session_state.stage < 10 and parsed_responses is not None:
1288
  columns_to_analyze = st.multiselect(
1289
  label='Select columns to analyze',
1290
  options=parsed_responses.columns
 
1351
 
1352
  if st.session_state['data_processed']:
1353
  parsed2 = st.session_state.get('dataset', pd.DataFrame())
1354
+ if parsed2 is not None:
1355
+ st.session_state['stage'] = 10
1356
  parsed2 = filter_dataframe(parsed2)
1357
  col1, col2 = st.columns(2)
1358
  st.dataframe(parsed2)
1359
  with col1:
1360
  col_parsed2 = st.selectbox("Which column do you want to query?", parsed2.columns)
1361
  with col2:
1362
+ GEMINI_KEY = st.text_input('Gemini API Key', GEMINI_KEY, type='password', help="Enter Gemini API key")
1363
  if col_parsed and GEMINI_KEY:
1364
  selected_column_data2 = parsed2[col_parsed2].tolist()
1365
  question2 = st.text_input("Ask a question / leave empty for summarization")