acecalisto3 commited on
Commit
3a6d326
Β·
verified Β·
1 Parent(s): 90ff6aa

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +278 -143
app2.py CHANGED
@@ -972,16 +972,27 @@ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> Li
972
  return []
973
 
974
  # --- Chatbot Logic ---
975
- def respond_to_chat(message: str, chat_history: List[Tuple[str, str]], chatbot_data: Optional[List[Dict]]) -> Tuple[List[Tuple[str, str]], List[Dict]]:
976
- """Responds to user chat messages based on the loaded JSON data."""
 
 
 
 
 
 
 
 
 
977
  if chatbot_data is None or not chatbot_data:
978
  chat_history.append((message, "Please process some data first using the other tabs before chatting."))
979
- return chat_history, chatbot_data
980
 
981
  chat_history.append((message, ""))
982
 
983
  response = ""
984
  lower_message = message.lower().strip()
 
 
985
 
986
  try:
987
  # Attempt to flatten the data structure for easier querying
@@ -1098,195 +1109,236 @@ def respond_to_chat(message: str, chat_history: List[Tuple[str, str]], chatbot_d
1098
  else:
1099
  response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
1100
 
1101
- # Filter data based on a simple condition
1102
- match = re.search(r'show me items where (\w+)\s*([<>=!]+)\s*([\w"\']*)', lower_message) # Added quotes to value regex
1103
- if match:
1104
- column_name, operator, value_str = match.groups()
1105
- try:
1106
- # Attempt to infer value type (numeric, string, boolean)
1107
- value: Any
1108
- is_numeric_comparison = False
1109
- is_boolean_comparison = False
1110
-
1111
- if value_str.lower() in ['true', 'false']:
1112
- value = value_str.lower() == 'true'
1113
- is_boolean_comparison = True
1114
- else:
1115
- try:
1116
- value = float(value_str.strip("'\""))
1117
- is_numeric_comparison = True
1118
- except ValueError:
1119
- value = value_str.strip("'\"")
1120
-
1121
-
1122
- if column_name in df.columns:
1123
- if is_numeric_comparison:
1124
- numeric_col = pd.to_numeric(df[column_name], errors='coerce')
1125
- filtered_df = df.loc[pd.notna(numeric_col)]
1126
-
1127
- if operator == '>': filtered_results = filtered_df[numeric_col > value]
1128
- elif operator == '<': filtered_results = filtered_df[numeric_col < value]
1129
- elif operator == '>=': filtered_results = filtered_df[numeric_col >= value]
1130
- elif operator == '<=': filtered_results = filtered_df[numeric_col <= value]
1131
- elif operator == '==': filtered_results = filtered_df[numeric_col == value]
1132
- elif operator == '!=': filtered_results = filtered_df[numeric_col != value]
1133
- else:
1134
- filtered_results = pd.DataFrame()
1135
- response = f"Unsupported numeric operator: {operator}. Try >, <, >=, <=, ==, !=."
1136
-
1137
- if not filtered_results.empty:
1138
- preview = filtered_results.to_json(orient='records', indent=2)[:500] + "..." if len(filtered_results.to_json()) > 500 else filtered_results.to_json(orient='records', indent=2)
1139
- response = f"Here are the items where '{column_name}' {operator} {value_str}:\n```json\n{preview}\n```"
1140
- elif 'response' not in locals():
1141
- response = f"No items found where '{column_name}' {operator} {value_str}."
1142
-
1143
- elif is_boolean_comparison:
1144
- # Ensure column is boolean or can be interpreted as boolean
1145
- boolean_col = df[column_name].astype(bool, errors='ignore') # Coerce errors, might need more robust check
1146
- if operator == '==': filtered_results = df[boolean_col == value]
1147
- elif operator == '!=': filtered_results = df[boolean_col != value]
1148
- else:
1149
- filtered_results = pd.DataFrame()
1150
- response = f"Unsupported boolean operator: {operator}. Try == or !=."
1151
 
1152
- if not filtered_results.empty:
1153
- preview = filtered_results.to_json(orient='records', indent=2)[:500] + "..." if len(filtered_results.to_json()) > 500 else filtered_results.to_json(orient='records', indent=2)
1154
- response = f"Here are the items where '{column_name}' is {value_str}:\n```json\n{preview}\n```"
1155
- elif 'response' not in locals():
1156
- response = f"No items found where '{column_name}' is {value_str}."
1157
 
 
1158
 
1159
- elif operator == '==':
1160
- filtered_results = df[df[column_name] == value]
1161
- if not filtered_results.empty:
1162
- preview = filtered_results.to_json(orient='records', indent=2)[:500] + "..." if len(filtered_results.to_json()) > 500 else filtered_results.to_json(orient='records', indent=2)
1163
- response = f"Here are the items where '{column_name}' is '{value}':\n```json\n{preview}\n```"
1164
- else:
1165
- response = f"No items found where '{column_name}' is '{value}'."
1166
- elif operator == '!=':
1167
- filtered_results = df[df[column_name] != value]
1168
- if not filtered_results.empty:
1169
- preview = filtered_results.to_json(orient='records', indent=2)[:500] + "..." if len(filtered_results.to_json()) > 500 else filtered_results.to_json(orient='records', indent=2)
1170
- response = f"Here are the items where '{column_name}' is not '{value}':\n```json\n{preview}\n```"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1171
  else:
1172
- response = f"All items have '{column_name}' as '{value}' or the column doesn't exist."
1173
- else:
1174
- response = f"Unsupported operator for string comparison: {operator}. Try == or !=."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1175
 
1176
- else:
1177
- response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
1178
 
1179
- except Exception as e:
1180
- response = f"An error occurred while filtering data: {e}"
1181
- logger.error(f"Error filtering data based on condition: {e}")
1182
 
1183
  # Request structured output (e.g., as CSV or simplified JSON)
1184
- if "output as csv" in lower_message or "export as csv" in lower_message:
 
 
 
 
 
 
 
1185
  if df is not None and not df.empty:
1186
  csv_output = df.to_csv(index=False)
1187
  response = f"Here is the data in CSV format:\n```csv\n{csv_output[:1000]}...\n```\n(Output truncated for chat display)"
1188
  else:
1189
  response = "There is no data available to output as CSV."
1190
-
1191
- elif "output as json" in lower_message or "export as json" in lower_message:
1192
  if df is not None and not df.empty:
1193
  json_output = df.to_json(orient='records', indent=2)
1194
  response = f"Here is the data in JSON format:\n```json\n{json_output[:1000]}...\n```\n(Output truncated for chat display)"
1195
  else:
1196
  response = "There is no data available to output as JSON."
1197
 
1198
-
1199
- # --- General Queries (if no DataFrame or specific query matched) ---
1200
-
1201
- if not response:
1202
  if "how many items" in lower_message or "number of items" in lower_message:
1203
- if isinstance(chatbot_data, list):
1204
- response = f"There are {len(chatbot_data)} top-level items in the processed data."
 
 
 
 
1205
  elif isinstance(chatbot_data, dict):
1206
  response = "The processed data is a single dictionary, not a list of items."
1207
  else:
1208
  response = "The processed data is not a standard list or dictionary structure."
1209
 
1210
  elif "what is the structure" in lower_message or "tell me about the data" in lower_message:
1211
- if isinstance(chatbot_data, list) and chatbot_data:
 
 
 
 
 
 
 
 
1212
  sample_item = chatbot_data[0]
1213
- response = f"The data is a list containing {len(chatbot_data)} items. The first item has the following top-level keys: {list(sample_item.keys())}. I can try to tell you more about specific keys if you like."
1214
  elif isinstance(chatbot_data, dict):
1215
  response = f"The data is a dictionary with the following top-level keys: {list(chatbot_data.keys())}."
1216
  else:
1217
  response = "The processed data is not a standard list or dictionary structure that I can easily describe."
1218
 
 
 
1219
  elif "show me" in lower_message or "get me" in lower_message or "extract" in lower_message:
1220
- parts = lower_message.split("show me")
1221
- if len(parts) > 1:
1222
- key_request = parts[1].strip().split(" ")[0]
1223
- extracted_values = []
1224
- if isinstance(chatbot_data, list):
1225
- for item in chatbot_data:
1226
- if isinstance(item, dict) and key_request in item:
1227
- extracted_values.append(item[key_request])
1228
- elif isinstance(chatbot_data, dict) and key_request in chatbot_data:
1229
- extracted_values.append(chatbot_data[key_request])
1230
-
1231
- if extracted_values:
1232
- preview = json.dumps(extracted_values, indent=2)[:500] + "..." if len(json.dumps(extracted_values)) > 500 else json.dumps(extracted_values, indent=2)
1233
- response = f"Here are the values for '{key_request}':\n```json\n{preview}\n```"
1234
- else:
1235
- response = f"I couldn't find a key named '{key_request}' in the top level of the data items."
1236
- else:
1237
- response = "What specifically would you like me to show or extract?"
1238
 
1239
  # --- Speculation about Modifications ---
1240
  elif "how can i modify" in lower_message or "how to change" in lower_message or "can i add" in lower_message or "can i remove" in lower_message:
1241
  response = "I cannot directly modify the data here, but I can tell you how you *could* modify it. What kind of change are you considering (e.g., adding an item, changing a value, removing a field)?"
1242
-
1243
  elif "add a field" in lower_message or "add a column" in lower_message:
1244
  response = "To add a field (or column if the data is tabular), you would typically iterate through each item (or row) in the data and add the new key-value pair. For example, adding a 'status' field with a default value."
1245
-
1246
  elif "change a value" in lower_message or "update a field" in lower_message:
1247
  response = "To change a value, you would need to identify the specific item(s) and the field you want to update. You could use a condition (like filtering) to find the right items and then assign a new value to the field."
1248
-
1249
  elif "remove a field" in lower_message or "delete a column" in lower_message:
1250
  response = "To remove a field, you would iterate through each item and delete the specified key. Be careful, as this is irreversible."
1251
-
1252
  elif "restructure" in lower_message or "change the format" in lower_message:
1253
  response = "Restructuring data involves transforming it into a different shape. This could mean flattening nested objects, grouping items, or pivoting data. This often requires writing custom code to map the old structure to the new one."
1254
-
1255
  elif "what if i" in lower_message or "if i changed" in lower_message:
1256
  response = "Tell me what specific change you're contemplating, and I can speculate on the potential impact or how you might approach it programmatically."
1257
 
1258
-
1259
  # --- General Conversation / Fallback ---
1260
  elif "hello" in lower_message or "hi" in lower_message:
1261
  response = random.choice(["Hello! How can I help you understand the processed data?", "Hi there! What's on your mind about this data?", "Hey! Ask me anything about the data you've loaded."])
1262
-
1263
  elif "thank you" in lower_message or "thanks" in lower_message:
1264
  response = random.choice(["You're welcome!", "Glad I could help.", "No problem! Let me know if you have more questions about the data."])
1265
-
1266
- elif "clear chat" in lower_message:
1267
- chat_history = []
1268
  response = "Chat history cleared."
1269
-
1270
- elif not response:
1271
  response = random.choice([
1272
- "I can analyze the data you've processed. What would you like to know?",
1273
- "Ask me about the number of items, the structure, or values of specific fields.",
1274
- "I can perform basic analysis like counting unique values or calculating sums/averages if the data is suitable.",
1275
- "Tell me what you want to extract or filter from the data.",
1276
- "I'm still learning, but I can try to answer questions about the data structure and content."
1277
  ])
1278
 
1279
  except Exception as e:
1280
  logger.error(f"Chatbot runtime error: {e}")
1281
  response = f"An internal error occurred while processing your request: {e}"
1282
  response += "\nPlease try rephrasing your question or clear the chat history."
 
 
1283
 
1284
- if chat_history and chat_history[-1][1] == "":
 
 
 
1285
  chat_history[-1] = (chat_history[-1][0], response)
1286
- else:
1287
- chat_history.append(("", response))
1288
 
1289
- return chat_history, chatbot_data
1290
 
1291
  # --- Gradio Interface Definition ---
1292
  def create_modern_interface():
@@ -1454,6 +1506,11 @@ def create_modern_interface():
1454
  value=True,
1455
  info="Generate sequential QR codes for combined data"
1456
  )
 
 
 
 
 
1457
  process_btn = gr.Button(
1458
  "πŸ”„ Process & Generate QR",
1459
  variant="primary"
@@ -1477,11 +1534,18 @@ def create_modern_interface():
1477
  with gr.Tab("πŸ€– Chat with Data") as chat_tab:
1478
  chat_history = gr.State([])
1479
  chatbot = gr.Chatbot(label="Data Chatbot")
 
 
1480
  with gr.Row():
1481
  chat_input = gr.Textbox(label="Your Message", placeholder="Ask me about the processed data...")
1482
  send_msg_btn = gr.Button("Send")
 
 
 
 
1483
  clear_chat_btn = gr.Button("Clear Chat History")
1484
 
 
1485
  def load_example():
1486
  example = {
1487
  "type": "product_catalog",
@@ -1539,8 +1603,8 @@ def create_modern_interface():
1539
 
1540
  return viewport_html
1541
 
1542
- def process_inputs(urls, files, text, combine, crawl_depth):
1543
- """Process all inputs and generate QR codes"""
1544
  results = []
1545
  processing_status_messages = []
1546
 
@@ -1602,18 +1666,23 @@ def create_modern_interface():
1602
 
1603
  qr_paths = []
1604
  final_json_output = None
 
1605
 
1606
  if results:
1607
- qr_paths = generate_qr_codes(results, combine)
1608
- final_json_output = results
1609
-
1610
- if qr_paths:
1611
- processing_status_messages.append(f"βœ… Successfully generated {len(qr_paths)} QR codes.")
 
 
 
1612
  else:
1613
- processing_status_messages.append("❌ Failed to generate QR codes.")
1614
-
1615
  else:
1616
  processing_status_messages.append("⚠️ No valid content collected from inputs.")
 
1617
 
1618
  except Exception as e:
1619
  logger.error(f"Overall processing error in process_inputs: {e}")
@@ -1640,7 +1709,7 @@ def create_modern_interface():
1640
 
1641
  process_btn.click(
1642
  process_inputs,
1643
- inputs=[url_input, file_input, text_input, combine_data, crawl_depth_slider],
1644
  outputs=[output_json, output_gallery, output_text, chatbot_data]
1645
  ).then(
1646
  on_qr_generation,
@@ -1652,8 +1721,10 @@ def create_modern_interface():
1652
 
1653
  send_msg_btn.click(
1654
  respond_to_chat,
1655
- inputs=[chat_input, chat_history, chatbot_data],
1656
- outputs=[chatbot, chatbot_data]
 
 
1657
  ).then(
1658
  lambda: "",
1659
  inputs=None,
@@ -1671,9 +1742,73 @@ def create_modern_interface():
1671
  )
1672
 
1673
  clear_chat_btn.click(
1674
- lambda: [],
1675
  inputs=None,
1676
- outputs=chatbot
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1677
  )
1678
 
1679
  gr.Markdown("""
@@ -1722,4 +1857,4 @@ def main():
1722
  raise
1723
 
1724
  if __name__ == "__main__":
1725
- main()
 
972
  return []
973
 
974
  # --- Chatbot Logic ---
975
+ def respond_to_chat(
976
+ message: str,
977
+ chat_history: List[Tuple[str, str]],
978
+ chatbot_data: Optional[List[Dict]],
979
+ # Add current_filtered_df_state as input, it will be updated and returned
980
+ current_filtered_df_state: Optional[pd.DataFrame]
981
+ ) -> Tuple[List[Tuple[str, str]], List[Dict], Optional[pd.DataFrame]]:
982
+ """
983
+ Responds to user chat messages based on the loaded JSON data.
984
+ Manages and returns the state of the filtered DataFrame.
985
+ """
986
  if chatbot_data is None or not chatbot_data:
987
  chat_history.append((message, "Please process some data first using the other tabs before chatting."))
988
+ return chat_history, chatbot_data, current_filtered_df_state # Return existing state
989
 
990
  chat_history.append((message, ""))
991
 
992
  response = ""
993
  lower_message = message.lower().strip()
994
+ # Initialize new_filtered_df_state with the current state to preserve it unless a filter changes it
995
+ new_filtered_df_state = current_filtered_df_state
996
 
997
  try:
998
  # Attempt to flatten the data structure for easier querying
 
1109
  else:
1110
  response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
1111
 
1112
+ # Enhanced Filter data based on more complex conditions
1113
+ # Regex to capture: "filter by status active", "show items where category is 'electronics'", "find entries where price > 100"
1114
+ # It tries to capture:
1115
+ # 1. column_name (e.g., category, status, price)
1116
+ # 2. operator (e.g., is, equals, =, >, <, contains, starts with, ends with) - flexible operators
1117
+ # 3. value (e.g., 'electronics', active, 100) - can be quoted or unquoted
1118
+ filter_match = re.search(
1119
+ r'(?:filter|show items|show me items|find entries|select items|get items)\s+' # Optional action phrases
1120
+ r'(?:where|by|for|with|if)\s+' # Keyword indicating condition
1121
+ r'(\w+)\s+' # Column name
1122
+ r'(is|equals?|==|!=|>=?|<=?|contains?|starts with|ends with)\s+' # Operator
1123
+ r'([\'"]?[\w\s.-]+[\'"]?)', # Value (allows spaces, dots, hyphens if quoted, or single words)
1124
+ lower_message
1125
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1126
 
1127
+ if filter_match:
1128
+ column_name, operator, value_str = filter_match.groups()
1129
+ column_name = column_name.strip()
1130
+ operator = operator.strip().lower()
1131
+ value_str = value_str.strip().strip("'\"")
1132
 
1133
+ logger.info(f"Filter request: Column='{column_name}', Operator='{operator}', Value='{value_str}'")
1134
 
1135
+ if column_name not in df.columns:
1136
+ response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
1137
+ new_filtered_df_state = None # Clear previous filter if column not found
1138
+ else:
1139
+ # IMPORTANT: Always filter from the original full dataframe 'df'
1140
+ active_df_to_filter = df.copy()
1141
+ try:
1142
+ # Attempt to infer value type for comparison
1143
+ target_value: Any
1144
+ col_dtype = df[column_name].dtype
1145
+
1146
+ if pd.api.types.is_numeric_dtype(col_dtype) and operator in ['>', '>=', '<', '<=', '==', '!=']:
1147
+ try:
1148
+ target_value = float(value_str)
1149
+ col_series = pd.to_numeric(filtered_df[column_name], errors='coerce')
1150
+ except ValueError:
1151
+ response = f"For numeric column '{column_name}', '{value_str}' is not a valid number."
1152
+ target_value = None # Error case
1153
+ elif pd.api.types.is_bool_dtype(col_dtype) or value_str.lower() in ['true', 'false']:
1154
+ target_value = value_str.lower() == 'true'
1155
+ col_series = filtered_df[column_name].astype(bool, errors='ignore')
1156
+ else: # Assume string comparison otherwise
1157
+ target_value = str(value_str)
1158
+ col_series = filtered_df[column_name].astype(str).str.lower() # Case-insensitive for strings
1159
+ value_str_lower = target_value.lower()
1160
+
1161
+
1162
+ if 'response' not in locals(): # If no type conversion error occurred
1163
+ if operator in ['is', 'equals', '==']:
1164
+ if pd.api.types.is_numeric_dtype(col_dtype) or pd.api.types.is_bool_dtype(col_dtype):
1165
+ condition = col_series == target_value
1166
+ else: # String comparison
1167
+ condition = col_series == value_str_lower
1168
+ elif operator == '!=':
1169
+ if pd.api.types.is_numeric_dtype(col_dtype) or pd.api.types.is_bool_dtype(col_dtype):
1170
+ condition = col_series != target_value
1171
+ else: # String comparison
1172
+ condition = col_series != value_str_lower
1173
+ elif operator == '>' and pd.api.types.is_numeric_dtype(col_dtype):
1174
+ condition = col_series > target_value
1175
+ elif operator == '>=' and pd.api.types.is_numeric_dtype(col_dtype):
1176
+ condition = col_series >= target_value
1177
+ elif operator == '<' and pd.api.types.is_numeric_dtype(col_dtype):
1178
+ condition = col_series < target_value
1179
+ elif operator == '<=' and pd.api.types.is_numeric_dtype(col_dtype):
1180
+ condition = col_series <= target_value
1181
+ elif operator in ['contains', 'contain'] and pd.api.types.is_string_dtype(col_series):
1182
+ condition = col_series.str.contains(value_str_lower, case=False, na=False)
1183
+ elif operator == 'starts with' and pd.api.types.is_string_dtype(col_series):
1184
+ condition = col_series.str.startswith(value_str_lower, na=False)
1185
+ elif operator == 'ends with' and pd.api.types.is_string_dtype(col_series):
1186
+ condition = col_series.str.endswith(value_str_lower, na=False)
1187
  else:
1188
+ response = f"Unsupported operator '{operator}' for column '{column_name}' (type: {col_dtype})."
1189
+ condition = None
1190
+ # If operator was bad, response is set, clear filter state
1191
+ if response: new_filtered_df_state = None
1192
+
1193
+
1194
+ if condition is not None:
1195
+ # Apply condition to the active_df_to_filter (which is a copy of the full df)
1196
+ filtered_results_df = active_df_to_filter[condition]
1197
+ if not filtered_results_df.empty:
1198
+ new_filtered_df_state = filtered_results_df # Update state with new filter result
1199
+ num_results = len(filtered_results_df)
1200
+ preview_rows = min(num_results, 5)
1201
+ preview_cols = min(len(filtered_results_df.columns), 5)
1202
+
1203
+ preview_df = filtered_results_df.head(preview_rows).iloc[:, :preview_cols]
1204
+ preview_str = preview_df.to_string(index=False)
1205
+
1206
+ response = (f"Found {num_results} items where '{column_name}' {operator} '{value_str}'.\n"
1207
+ f"Here's a preview:\n```\n{preview_str}\n```\n"
1208
+ f"The full filtered dataset is now available for download using the 'Download Filtered JSON' button.")
1209
+ else:
1210
+ new_filtered_df_state = pd.DataFrame() # Store empty DF for "no results"
1211
+ response = f"No items found where '{column_name}' {operator} '{value_str}'."
1212
+ # If condition is None (e.g. bad operator) and response not already set by type check, set generic invalid op message.
1213
+ elif not response: # Avoid overwriting specific error from type check
1214
+ response = f"Unsupported operator '{operator}' for column '{column_name}' (type: {col_dtype})."
1215
+ new_filtered_df_state = None
1216
+
1217
+
1218
+ except ValueError as ve: # Specifically catch ValueError for target_value conversion
1219
+ response = f"Invalid value '{value_str}' for numeric column '{column_name}'. {ve}"
1220
+ new_filtered_df_state = None # Clear on value error
1221
+ logger.warning(f"ValueError during filter: {ve}")
1222
+ except Exception as e:
1223
+ new_filtered_df_state = None # Clear on other errors
1224
+ response = f"An error occurred while applying the filter: {e}"
1225
+ logger.error(f"Error applying filter (column='{column_name}', op='{operator}', val='{value_str}'): {e}")
1226
+ # If the message was a filter, new_filtered_df_state is now set (or None/empty if error/no results)
1227
+
1228
+ # --- End of Enhanced Filter Logic ---
1229
 
1230
+ # If `response` is still empty, it means no filter query was matched by the filter_match regex.
1231
+ # In this case, new_filtered_df_state (initialized from current_filtered_df_state) remains unchanged.
1232
 
 
 
 
1233
 
1234
  # Request structured output (e.g., as CSV or simplified JSON)
1235
+ # This section should act on the *original* df unless specifically asked for filtered data export.
1236
+ # The new download buttons handle filtered data export separately.
1237
+ # Let's assume for now it acts on the original df, and a separate command would be needed for "export filtered data"
1238
+ # If no filter query matched, and no other specific df query matched,
1239
+ # then `response` might still be empty. `new_filtered_df_state` will be the same as `current_filtered_df_state`.
1240
+ # The general queries below should not reset `new_filtered_df_state` unless it's a "clear" command.
1241
+
1242
+ elif "output as csv" in lower_message or "export as csv" in lower_message:
1243
  if df is not None and not df.empty:
1244
  csv_output = df.to_csv(index=False)
1245
  response = f"Here is the data in CSV format:\n```csv\n{csv_output[:1000]}...\n```\n(Output truncated for chat display)"
1246
  else:
1247
  response = "There is no data available to output as CSV."
1248
+ elif "output as json" in lower_message or "export as json" in lower_message: # Note: "export as json" is different from download buttons
 
1249
  if df is not None and not df.empty:
1250
  json_output = df.to_json(orient='records', indent=2)
1251
  response = f"Here is the data in JSON format:\n```json\n{json_output[:1000]}...\n```\n(Output truncated for chat display)"
1252
  else:
1253
  response = "There is no data available to output as JSON."
1254
 
1255
+ # --- General Queries (if no DataFrame or specific query matched AND no filter was applied in this turn) ---
1256
+ # These should not clear new_filtered_df_state unless it's a "clear chat"
1257
+ if not response: # Only enter if no response has been generated by DataFrame/filter logic
 
1258
  if "how many items" in lower_message or "number of items" in lower_message:
1259
+ if new_filtered_df_state is not None and not new_filtered_df_state.empty:
1260
+ response = f"The currently filtered dataset has {len(new_filtered_df_state)} items. The original dataset has {len(df if df is not None else chatbot_data)} items."
1261
+ elif df is not None: # Check df from original chatbot_data
1262
+ response = f"There are {len(df)} top-level items in the processed data."
1263
+ elif isinstance(chatbot_data, list): # Fallback if df creation failed but chatbot_data is list
1264
+ response = f"There are {len(chatbot_data)} top-level items in the processed data (not in DataFrame)."
1265
  elif isinstance(chatbot_data, dict):
1266
  response = "The processed data is a single dictionary, not a list of items."
1267
  else:
1268
  response = "The processed data is not a standard list or dictionary structure."
1269
 
1270
  elif "what is the structure" in lower_message or "tell me about the data" in lower_message:
1271
+ if new_filtered_df_state is not None and not new_filtered_df_state.empty:
1272
+ response = f"The filtered data has columns: {', '.join(new_filtered_df_state.columns)}. "
1273
+ if df is not None:
1274
+ response += f"The original data has columns: {', '.join(df.columns)}."
1275
+ else:
1276
+ response += "Original data structure is not tabular."
1277
+ elif df is not None:
1278
+ response = f"The data is a table with {len(df)} rows and columns: {', '.join(df.columns)}."
1279
+ elif isinstance(chatbot_data, list) and chatbot_data:
1280
  sample_item = chatbot_data[0]
1281
+ response = f"The data is a list containing {len(chatbot_data)} items. The first item has the following top-level keys: {list(sample_item.keys())}."
1282
  elif isinstance(chatbot_data, dict):
1283
  response = f"The data is a dictionary with the following top-level keys: {list(chatbot_data.keys())}."
1284
  else:
1285
  response = "The processed data is not a standard list or dictionary structure that I can easily describe."
1286
 
1287
+ # "show me" without a filter condition might be ambiguous.
1288
+ # Let's assume it refers to the original data or provide guidance.
1289
  elif "show me" in lower_message or "get me" in lower_message or "extract" in lower_message:
1290
+ # This specific 'show me' without 'where' should not trigger a filter or clear existing filter state.
1291
+ # It's a general request for data, which is too broad. Guide the user.
1292
+ response = "If you want to filter the data, please use a phrase like 'show me items where column_name is value'. If you want to see the raw data, consider using the download buttons."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1293
 
1294
  # --- Speculation about Modifications ---
1295
  elif "how can i modify" in lower_message or "how to change" in lower_message or "can i add" in lower_message or "can i remove" in lower_message:
1296
  response = "I cannot directly modify the data here, but I can tell you how you *could* modify it. What kind of change are you considering (e.g., adding an item, changing a value, removing a field)?"
 
1297
  elif "add a field" in lower_message or "add a column" in lower_message:
1298
  response = "To add a field (or column if the data is tabular), you would typically iterate through each item (or row) in the data and add the new key-value pair. For example, adding a 'status' field with a default value."
 
1299
  elif "change a value" in lower_message or "update a field" in lower_message:
1300
  response = "To change a value, you would need to identify the specific item(s) and the field you want to update. You could use a condition (like filtering) to find the right items and then assign a new value to the field."
 
1301
  elif "remove a field" in lower_message or "delete a column" in lower_message:
1302
  response = "To remove a field, you would iterate through each item and delete the specified key. Be careful, as this is irreversible."
 
1303
  elif "restructure" in lower_message or "change the format" in lower_message:
1304
  response = "Restructuring data involves transforming it into a different shape. This could mean flattening nested objects, grouping items, or pivoting data. This often requires writing custom code to map the old structure to the new one."
 
1305
  elif "what if i" in lower_message or "if i changed" in lower_message:
1306
  response = "Tell me what specific change you're contemplating, and I can speculate on the potential impact or how you might approach it programmatically."
1307
 
 
1308
  # --- General Conversation / Fallback ---
1309
  elif "hello" in lower_message or "hi" in lower_message:
1310
  response = random.choice(["Hello! How can I help you understand the processed data?", "Hi there! What's on your mind about this data?", "Hey! Ask me anything about the data you've loaded."])
 
1311
  elif "thank you" in lower_message or "thanks" in lower_message:
1312
  response = random.choice(["You're welcome!", "Glad I could help.", "No problem! Let me know if you have more questions about the data."])
1313
+ elif "clear chat" in lower_message: # This should be caught by button, but as text too
1314
+ chat_history = [] # Gradio handles this for the Chatbot component via button
 
1315
  response = "Chat history cleared."
1316
+ new_filtered_df_state = None # Also clear filtered data on "clear chat" command by text
1317
+ elif not response: # Fallback if nothing else matched
1318
  response = random.choice([
1319
+ "I can analyze the data you've processed. What would you like to know? Try asking to filter data, e.g., 'show items where status is active'.",
1320
+ "Ask me about the number of items, the structure, or values of specific fields. You can also filter data.",
1321
+ "I can perform basic analysis or filter the data. For example: 'filter by price > 100'.",
1322
+ "Tell me what you want to extract or filter from the data. Use phrases like 'show items where ...'.",
1323
+ "I'm equipped to filter your data. Try 'find entries where name contains widget'."
1324
  ])
1325
 
1326
  except Exception as e:
1327
  logger.error(f"Chatbot runtime error: {e}")
1328
  response = f"An internal error occurred while processing your request: {e}"
1329
  response += "\nPlease try rephrasing your question or clear the chat history."
1330
+ # On unexpected error, preserve the current_filtered_df_state rather than clearing or modifying it.
1331
+ # new_filtered_df_state = current_filtered_df_state # This line is effectively already done by initialization
1332
 
1333
+ if not response: # Final safety net for response, if it's somehow still empty
1334
+ response = "I'm not sure how to respond to that. Please try rephrasing or ask for help on available commands."
1335
+
1336
+ if chat_history and chat_history[-1][1] == "": # If last history entry is (user_msg, "")
1337
  chat_history[-1] = (chat_history[-1][0], response)
1338
+ # else: # This case should ideally not be reached if chat_history.append((message, "")) is always called first.
1339
+ # chat_history.append((message, response)) # Avoids duplicate user message if something went wrong
1340
 
1341
+ return chat_history, chatbot_data, new_filtered_df_state
1342
 
1343
  # --- Gradio Interface Definition ---
1344
  def create_modern_interface():
 
1506
  value=True,
1507
  info="Generate sequential QR codes for combined data"
1508
  )
1509
+ generate_qr_toggle = gr.Checkbox(
1510
+ label="Generate QR Codes",
1511
+ value=False, # Default to False as per task
1512
+ info="Enable to generate QR codes for the processed data."
1513
+ )
1514
  process_btn = gr.Button(
1515
  "πŸ”„ Process & Generate QR",
1516
  variant="primary"
 
1534
  with gr.Tab("πŸ€– Chat with Data") as chat_tab:
1535
  chat_history = gr.State([])
1536
  chatbot = gr.Chatbot(label="Data Chatbot")
1537
+ filtered_chatbot_df_state = gr.State(None) # To store the filtered DataFrame
1538
+
1539
  with gr.Row():
1540
  chat_input = gr.Textbox(label="Your Message", placeholder="Ask me about the processed data...")
1541
  send_msg_btn = gr.Button("Send")
1542
+ with gr.Row():
1543
+ download_full_json_btn = gr.Button("Download Full JSON")
1544
+ download_filtered_json_btn = gr.Button("Download Filtered JSON")
1545
+ download_file_output = gr.File(label="Download Data", interactive=False) # For triggering download
1546
  clear_chat_btn = gr.Button("Clear Chat History")
1547
 
1548
+
1549
  def load_example():
1550
  example = {
1551
  "type": "product_catalog",
 
1603
 
1604
  return viewport_html
1605
 
1606
+ def process_inputs(urls, files, text, combine, crawl_depth, generate_qr_enabled):
1607
+ """Process all inputs and generate QR codes based on toggle"""
1608
  results = []
1609
  processing_status_messages = []
1610
 
 
1666
 
1667
  qr_paths = []
1668
  final_json_output = None
1669
+ qr_paths = []
1670
 
1671
  if results:
1672
+ final_json_output = results # Assign processed data regardless of QR generation
1673
+ if generate_qr_enabled:
1674
+ processing_status_messages.append("βš™οΈ Generating QR codes as requested...")
1675
+ qr_paths = generate_qr_codes(results, combine)
1676
+ if qr_paths:
1677
+ processing_status_messages.append(f"βœ… Successfully generated {len(qr_paths)} QR codes.")
1678
+ else:
1679
+ processing_status_messages.append("❌ Failed to generate QR codes (empty result or error).")
1680
  else:
1681
+ processing_status_messages.append("β˜‘οΈ QR code generation was disabled. Processed data is available.")
1682
+ qr_paths = [] # Ensure it's empty
1683
  else:
1684
  processing_status_messages.append("⚠️ No valid content collected from inputs.")
1685
+ final_json_output = {} # Ensure output_json is cleared if no results
1686
 
1687
  except Exception as e:
1688
  logger.error(f"Overall processing error in process_inputs: {e}")
 
1709
 
1710
  process_btn.click(
1711
  process_inputs,
1712
+ inputs=[url_input, file_input, text_input, combine_data, crawl_depth_slider, generate_qr_toggle],
1713
  outputs=[output_json, output_gallery, output_text, chatbot_data]
1714
  ).then(
1715
  on_qr_generation,
 
1721
 
1722
  send_msg_btn.click(
1723
  respond_to_chat,
1724
+ inputs=[chat_input, chat_history, chatbot_data, filtered_chatbot_df_state],
1725
+ outputs=[chatbot, chatbot_data, filtered_chatbot_df_state]
1726
+ inputs=[chat_input, chat_history, chatbot_data, filtered_chatbot_df_state],
1727
+ outputs=[chatbot, chatbot_data, filtered_chatbot_df_state]
1728
  ).then(
1729
  lambda: "",
1730
  inputs=None,
 
1742
  )
1743
 
1744
  clear_chat_btn.click(
1745
+ lambda: ([], None), # Clear chat history and filtered data state
1746
  inputs=None,
1747
+ outputs=[chatbot, filtered_chatbot_df_state]
1748
+ )
1749
+
1750
+ # --- Download Logic ---
1751
+ def download_json_data(data_df: Optional[pd.DataFrame], filename_prefix: str) -> Optional[str]:
1752
+ if data_df is None or data_df.empty:
1753
+ logger.info(f"No data provided for download with prefix '{filename_prefix}'.")
1754
+ return None
1755
+ try:
1756
+ data_list = data_df.to_dict(orient='records')
1757
+ json_str = json.dumps(data_list, indent=2, ensure_ascii=False)
1758
+
1759
+ timestamp = int(time.time())
1760
+ filename = f"{filename_prefix}_{timestamp}.json"
1761
+ file_path = TEMP_DIR / filename
1762
+
1763
+ with open(file_path, 'w', encoding='utf-8') as f:
1764
+ f.write(json_str)
1765
+
1766
+ logger.info(f"Successfully created JSON file for download: {file_path}")
1767
+ return str(file_path)
1768
+ except Exception as e:
1769
+ logger.error(f"Error creating JSON file for {filename_prefix}: {e}")
1770
+ return None
1771
+
1772
+ def handle_download_full_json(current_chatbot_data_state: Optional[List[Dict]]) -> Optional[str]:
1773
+ if not current_chatbot_data_state:
1774
+ logger.info("No full data available to download.")
1775
+ # Optionally, could return a gr.Warning or gr.Info to the UI if we had a dedicated status Textbox for downloads
1776
+ return None
1777
+
1778
+ # The chatbot_data state is a list of dicts. Convert to DataFrame for download_json_data.
1779
+ # The df created in respond_to_chat is not directly used here to ensure we get the *original* full data.
1780
+ try:
1781
+ # A similar flattening logic as in respond_to_chat might be needed if chatbot_data_state is complex
1782
+ # For now, assume it's a list of flat dictionaries or can be handled by pd.DataFrame directly.
1783
+ # If chatbot_data_state originates from `results` in `process_inputs`, it's a list of dicts.
1784
+ # A more robust approach would be to re-use the exact flattening from respond_to_chat if structures are nested.
1785
+ # Let's try a direct conversion first.
1786
+ df_to_download = pd.DataFrame(current_chatbot_data_state)
1787
+ if df_to_download.empty:
1788
+ logger.info("Full data resulted in an empty DataFrame. Nothing to download.")
1789
+ return None
1790
+ except Exception as e:
1791
+ logger.error(f"Error converting full chatbot_data to DataFrame for download: {e}")
1792
+ return None
1793
+
1794
+ return download_json_data(df_to_download, "full_data")
1795
+
1796
+ def handle_download_filtered_json(current_filtered_df_state: Optional[pd.DataFrame]) -> Optional[str]:
1797
+ if current_filtered_df_state is None or current_filtered_df_state.empty:
1798
+ logger.info("No filtered data available to download.")
1799
+ # Consider gr.Info("No filtered data to download.") if a text output for this is desired.
1800
+ return None
1801
+ return download_json_data(current_filtered_df_state, "filtered_data")
1802
+
1803
+ download_full_json_btn.click(
1804
+ fn=handle_download_full_json,
1805
+ inputs=[chatbot_data], # chatbot_data is the gr.State holding the full dataset (List[Dict])
1806
+ outputs=[download_file_output]
1807
+ )
1808
+ download_filtered_json_btn.click(
1809
+ fn=handle_download_filtered_json,
1810
+ inputs=[filtered_chatbot_df_state], # This state holds the filtered DataFrame
1811
+ outputs=[download_file_output]
1812
  )
1813
 
1814
  gr.Markdown("""
 
1857
  raise
1858
 
1859
  if __name__ == "__main__":
1860
+ main()