Spaces:
Sleeping
Sleeping
Cache classes to avoid reruns / change button tags to avoid redundency
Browse files
app.py
CHANGED
@@ -71,6 +71,7 @@ import streamlit as st
|
|
71 |
# Configure logging
|
72 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
73 |
|
|
|
74 |
class UAPAnalyzer:
|
75 |
"""
|
76 |
A class for analyzing and clustering textual data within a pandas DataFrame using
|
@@ -785,7 +786,7 @@ def plot_cramers_v_heatmap(data, significance_level=0.05):
|
|
785 |
plt.title(f"Heatmap of Cramér's V (p < {significance_level})")
|
786 |
return plt
|
787 |
|
788 |
-
|
789 |
class UAPVisualizer:
|
790 |
def __init__(self, data=None):
|
791 |
pass # Initialization can be added if needed
|
@@ -925,7 +926,7 @@ class UAPVisualizer:
|
|
925 |
plt.show()
|
926 |
|
927 |
|
928 |
-
|
929 |
class UAPParser:
|
930 |
def __init__(self, api_key, model="gpt-3.5-turbo-0125", col=None, format_long=None):
|
931 |
os.environ['OPENAI_API_KEY'] = api_key
|
@@ -1140,9 +1141,19 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
1140 |
start_date, end_date = user_date_input
|
1141 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
1142 |
else:
|
1143 |
-
|
1144 |
-
|
1145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1146 |
if user_text_input:
|
1147 |
df_ = df_[df_[column].astype(str).str.contains(user_text_input)]
|
1148 |
# write len of df after filtering with % of original
|
@@ -1273,7 +1284,7 @@ def main():
|
|
1273 |
st.session_state['stage'] = 1
|
1274 |
|
1275 |
# Analyze data
|
1276 |
-
if st.session_state.stage > 0:
|
1277 |
columns_to_analyze = st.multiselect(
|
1278 |
label='Select columns to analyze',
|
1279 |
options=parsed_responses.columns
|
@@ -1340,13 +1351,15 @@ def main():
|
|
1340 |
|
1341 |
if st.session_state['data_processed']:
|
1342 |
parsed2 = st.session_state.get('dataset', pd.DataFrame())
|
|
|
|
|
1343 |
parsed2 = filter_dataframe(parsed2)
|
1344 |
col1, col2 = st.columns(2)
|
1345 |
st.dataframe(parsed2)
|
1346 |
with col1:
|
1347 |
col_parsed2 = st.selectbox("Which column do you want to query?", parsed2.columns)
|
1348 |
with col2:
|
1349 |
-
GEMINI_KEY = st.text_input('Gemini API Key', GEMINI_KEY, type='password', help="Enter
|
1350 |
if col_parsed and GEMINI_KEY:
|
1351 |
selected_column_data2 = parsed2[col_parsed2].tolist()
|
1352 |
question2 = st.text_input("Ask a question / leave empty for summarization")
|
|
|
71 |
# Configure logging
|
72 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
73 |
|
74 |
+
@st.cache_data(ttl=1800, max_entries=30, show_spinner='Retrieving data...')
|
75 |
class UAPAnalyzer:
|
76 |
"""
|
77 |
A class for analyzing and clustering textual data within a pandas DataFrame using
|
|
|
786 |
plt.title(f"Heatmap of Cramér's V (p < {significance_level})")
|
787 |
return plt
|
788 |
|
789 |
+
@st.cache_data(ttl=1800, max_entries=30, show_spinner='Retrieving data...')
|
790 |
class UAPVisualizer:
|
791 |
def __init__(self, data=None):
|
792 |
pass # Initialization can be added if needed
|
|
|
926 |
plt.show()
|
927 |
|
928 |
|
929 |
+
@st.cache_data(ttl=1800, max_entries=30, show_spinner='Retrieving data...')
|
930 |
class UAPParser:
|
931 |
def __init__(self, api_key, model="gpt-3.5-turbo-0125", col=None, format_long=None):
|
932 |
os.environ['OPENAI_API_KEY'] = api_key
|
|
|
1141 |
start_date, end_date = user_date_input
|
1142 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
1143 |
else:
|
1144 |
+
try: # To avoid multiple buttons with same ID
|
1145 |
+
user_text_input = right.text_input(
|
1146 |
+
f"Substring or regex in {column}",
|
1147 |
+
)
|
1148 |
+
except:
|
1149 |
+
try:
|
1150 |
+
user_text_input = right.text_input(
|
1151 |
+
f"Substring or regex {column}",
|
1152 |
+
)
|
1153 |
+
except Exception as e:
|
1154 |
+
print(f'Error : {e}')
|
1155 |
+
pass
|
1156 |
+
|
1157 |
if user_text_input:
|
1158 |
df_ = df_[df_[column].astype(str).str.contains(user_text_input)]
|
1159 |
# write len of df after filtering with % of original
|
|
|
1284 |
st.session_state['stage'] = 1
|
1285 |
|
1286 |
# Analyze data
|
1287 |
+
if st.session_state.stage > 0 and st.session_state.stage < 10 and parsed_responses is not None:
|
1288 |
columns_to_analyze = st.multiselect(
|
1289 |
label='Select columns to analyze',
|
1290 |
options=parsed_responses.columns
|
|
|
1351 |
|
1352 |
if st.session_state['data_processed']:
|
1353 |
parsed2 = st.session_state.get('dataset', pd.DataFrame())
|
1354 |
+
if parsed2 is not None:
|
1355 |
+
st.session_state['stage'] = 10
|
1356 |
parsed2 = filter_dataframe(parsed2)
|
1357 |
col1, col2 = st.columns(2)
|
1358 |
st.dataframe(parsed2)
|
1359 |
with col1:
|
1360 |
col_parsed2 = st.selectbox("Which column do you want to query?", parsed2.columns)
|
1361 |
with col2:
|
1362 |
+
GEMINI_KEY = st.text_input('Gemini API Key', GEMINI_KEY, type='password', help="Enter Gemini API key")
|
1363 |
if col_parsed and GEMINI_KEY:
|
1364 |
selected_column_data2 = parsed2[col_parsed2].tolist()
|
1365 |
question2 = st.text_input("Ask a question / leave empty for summarization")
|