Spaces:
Running
Running
Update app2.py
Browse files
app2.py
CHANGED
@@ -972,16 +972,27 @@ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> Li
|
|
972 |
return []
|
973 |
|
974 |
# --- Chatbot Logic ---
|
975 |
-
def respond_to_chat(
|
976 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
977 |
if chatbot_data is None or not chatbot_data:
|
978 |
chat_history.append((message, "Please process some data first using the other tabs before chatting."))
|
979 |
-
return chat_history, chatbot_data
|
980 |
|
981 |
chat_history.append((message, ""))
|
982 |
|
983 |
response = ""
|
984 |
lower_message = message.lower().strip()
|
|
|
|
|
985 |
|
986 |
try:
|
987 |
# Attempt to flatten the data structure for easier querying
|
@@ -1098,195 +1109,236 @@ def respond_to_chat(message: str, chat_history: List[Tuple[str, str]], chatbot_d
|
|
1098 |
else:
|
1099 |
response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
|
1100 |
|
1101 |
-
# Filter data based on
|
1102 |
-
|
1103 |
-
|
1104 |
-
|
1105 |
-
|
1106 |
-
|
1107 |
-
|
1108 |
-
|
1109 |
-
|
1110 |
-
|
1111 |
-
|
1112 |
-
|
1113 |
-
|
1114 |
-
|
1115 |
-
try:
|
1116 |
-
value = float(value_str.strip("'\""))
|
1117 |
-
is_numeric_comparison = True
|
1118 |
-
except ValueError:
|
1119 |
-
value = value_str.strip("'\"")
|
1120 |
-
|
1121 |
-
|
1122 |
-
if column_name in df.columns:
|
1123 |
-
if is_numeric_comparison:
|
1124 |
-
numeric_col = pd.to_numeric(df[column_name], errors='coerce')
|
1125 |
-
filtered_df = df.loc[pd.notna(numeric_col)]
|
1126 |
-
|
1127 |
-
if operator == '>': filtered_results = filtered_df[numeric_col > value]
|
1128 |
-
elif operator == '<': filtered_results = filtered_df[numeric_col < value]
|
1129 |
-
elif operator == '>=': filtered_results = filtered_df[numeric_col >= value]
|
1130 |
-
elif operator == '<=': filtered_results = filtered_df[numeric_col <= value]
|
1131 |
-
elif operator == '==': filtered_results = filtered_df[numeric_col == value]
|
1132 |
-
elif operator == '!=': filtered_results = filtered_df[numeric_col != value]
|
1133 |
-
else:
|
1134 |
-
filtered_results = pd.DataFrame()
|
1135 |
-
response = f"Unsupported numeric operator: {operator}. Try >, <, >=, <=, ==, !=."
|
1136 |
-
|
1137 |
-
if not filtered_results.empty:
|
1138 |
-
preview = filtered_results.to_json(orient='records', indent=2)[:500] + "..." if len(filtered_results.to_json()) > 500 else filtered_results.to_json(orient='records', indent=2)
|
1139 |
-
response = f"Here are the items where '{column_name}' {operator} {value_str}:\n```json\n{preview}\n```"
|
1140 |
-
elif 'response' not in locals():
|
1141 |
-
response = f"No items found where '{column_name}' {operator} {value_str}."
|
1142 |
-
|
1143 |
-
elif is_boolean_comparison:
|
1144 |
-
# Ensure column is boolean or can be interpreted as boolean
|
1145 |
-
boolean_col = df[column_name].astype(bool, errors='ignore') # Coerce errors, might need more robust check
|
1146 |
-
if operator == '==': filtered_results = df[boolean_col == value]
|
1147 |
-
elif operator == '!=': filtered_results = df[boolean_col != value]
|
1148 |
-
else:
|
1149 |
-
filtered_results = pd.DataFrame()
|
1150 |
-
response = f"Unsupported boolean operator: {operator}. Try == or !=."
|
1151 |
|
1152 |
-
|
1153 |
-
|
1154 |
-
|
1155 |
-
|
1156 |
-
|
1157 |
|
|
|
1158 |
|
1159 |
-
|
1160 |
-
|
1161 |
-
|
1162 |
-
|
1163 |
-
|
1164 |
-
|
1165 |
-
|
1166 |
-
|
1167 |
-
|
1168 |
-
|
1169 |
-
|
1170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1171 |
else:
|
1172 |
-
response = f"
|
1173 |
-
|
1174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1175 |
|
1176 |
-
|
1177 |
-
|
1178 |
|
1179 |
-
except Exception as e:
|
1180 |
-
response = f"An error occurred while filtering data: {e}"
|
1181 |
-
logger.error(f"Error filtering data based on condition: {e}")
|
1182 |
|
1183 |
# Request structured output (e.g., as CSV or simplified JSON)
|
1184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1185 |
if df is not None and not df.empty:
|
1186 |
csv_output = df.to_csv(index=False)
|
1187 |
response = f"Here is the data in CSV format:\n```csv\n{csv_output[:1000]}...\n```\n(Output truncated for chat display)"
|
1188 |
else:
|
1189 |
response = "There is no data available to output as CSV."
|
1190 |
-
|
1191 |
-
elif "output as json" in lower_message or "export as json" in lower_message:
|
1192 |
if df is not None and not df.empty:
|
1193 |
json_output = df.to_json(orient='records', indent=2)
|
1194 |
response = f"Here is the data in JSON format:\n```json\n{json_output[:1000]}...\n```\n(Output truncated for chat display)"
|
1195 |
else:
|
1196 |
response = "There is no data available to output as JSON."
|
1197 |
|
1198 |
-
|
1199 |
-
#
|
1200 |
-
|
1201 |
-
if not response:
|
1202 |
if "how many items" in lower_message or "number of items" in lower_message:
|
1203 |
-
if
|
1204 |
-
response = f"
|
|
|
|
|
|
|
|
|
1205 |
elif isinstance(chatbot_data, dict):
|
1206 |
response = "The processed data is a single dictionary, not a list of items."
|
1207 |
else:
|
1208 |
response = "The processed data is not a standard list or dictionary structure."
|
1209 |
|
1210 |
elif "what is the structure" in lower_message or "tell me about the data" in lower_message:
|
1211 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1212 |
sample_item = chatbot_data[0]
|
1213 |
-
response = f"The data is a list containing {len(chatbot_data)} items. The first item has the following top-level keys: {list(sample_item.keys())}.
|
1214 |
elif isinstance(chatbot_data, dict):
|
1215 |
response = f"The data is a dictionary with the following top-level keys: {list(chatbot_data.keys())}."
|
1216 |
else:
|
1217 |
response = "The processed data is not a standard list or dictionary structure that I can easily describe."
|
1218 |
|
|
|
|
|
1219 |
elif "show me" in lower_message or "get me" in lower_message or "extract" in lower_message:
|
1220 |
-
|
1221 |
-
|
1222 |
-
|
1223 |
-
extracted_values = []
|
1224 |
-
if isinstance(chatbot_data, list):
|
1225 |
-
for item in chatbot_data:
|
1226 |
-
if isinstance(item, dict) and key_request in item:
|
1227 |
-
extracted_values.append(item[key_request])
|
1228 |
-
elif isinstance(chatbot_data, dict) and key_request in chatbot_data:
|
1229 |
-
extracted_values.append(chatbot_data[key_request])
|
1230 |
-
|
1231 |
-
if extracted_values:
|
1232 |
-
preview = json.dumps(extracted_values, indent=2)[:500] + "..." if len(json.dumps(extracted_values)) > 500 else json.dumps(extracted_values, indent=2)
|
1233 |
-
response = f"Here are the values for '{key_request}':\n```json\n{preview}\n```"
|
1234 |
-
else:
|
1235 |
-
response = f"I couldn't find a key named '{key_request}' in the top level of the data items."
|
1236 |
-
else:
|
1237 |
-
response = "What specifically would you like me to show or extract?"
|
1238 |
|
1239 |
# --- Speculation about Modifications ---
|
1240 |
elif "how can i modify" in lower_message or "how to change" in lower_message or "can i add" in lower_message or "can i remove" in lower_message:
|
1241 |
response = "I cannot directly modify the data here, but I can tell you how you *could* modify it. What kind of change are you considering (e.g., adding an item, changing a value, removing a field)?"
|
1242 |
-
|
1243 |
elif "add a field" in lower_message or "add a column" in lower_message:
|
1244 |
response = "To add a field (or column if the data is tabular), you would typically iterate through each item (or row) in the data and add the new key-value pair. For example, adding a 'status' field with a default value."
|
1245 |
-
|
1246 |
elif "change a value" in lower_message or "update a field" in lower_message:
|
1247 |
response = "To change a value, you would need to identify the specific item(s) and the field you want to update. You could use a condition (like filtering) to find the right items and then assign a new value to the field."
|
1248 |
-
|
1249 |
elif "remove a field" in lower_message or "delete a column" in lower_message:
|
1250 |
response = "To remove a field, you would iterate through each item and delete the specified key. Be careful, as this is irreversible."
|
1251 |
-
|
1252 |
elif "restructure" in lower_message or "change the format" in lower_message:
|
1253 |
response = "Restructuring data involves transforming it into a different shape. This could mean flattening nested objects, grouping items, or pivoting data. This often requires writing custom code to map the old structure to the new one."
|
1254 |
-
|
1255 |
elif "what if i" in lower_message or "if i changed" in lower_message:
|
1256 |
response = "Tell me what specific change you're contemplating, and I can speculate on the potential impact or how you might approach it programmatically."
|
1257 |
|
1258 |
-
|
1259 |
# --- General Conversation / Fallback ---
|
1260 |
elif "hello" in lower_message or "hi" in lower_message:
|
1261 |
response = random.choice(["Hello! How can I help you understand the processed data?", "Hi there! What's on your mind about this data?", "Hey! Ask me anything about the data you've loaded."])
|
1262 |
-
|
1263 |
elif "thank you" in lower_message or "thanks" in lower_message:
|
1264 |
response = random.choice(["You're welcome!", "Glad I could help.", "No problem! Let me know if you have more questions about the data."])
|
1265 |
-
|
1266 |
-
|
1267 |
-
chat_history = []
|
1268 |
response = "Chat history cleared."
|
1269 |
-
|
1270 |
-
elif not response:
|
1271 |
response = random.choice([
|
1272 |
-
"I can analyze the data you've processed. What would you like to know?",
|
1273 |
-
"Ask me about the number of items, the structure, or values of specific fields.",
|
1274 |
-
"I can perform basic analysis
|
1275 |
-
"Tell me what you want to extract or filter from the data.",
|
1276 |
-
"I'm
|
1277 |
])
|
1278 |
|
1279 |
except Exception as e:
|
1280 |
logger.error(f"Chatbot runtime error: {e}")
|
1281 |
response = f"An internal error occurred while processing your request: {e}"
|
1282 |
response += "\nPlease try rephrasing your question or clear the chat history."
|
|
|
|
|
1283 |
|
1284 |
-
if
|
|
|
|
|
|
|
1285 |
chat_history[-1] = (chat_history[-1][0], response)
|
1286 |
-
else:
|
1287 |
-
|
1288 |
|
1289 |
-
return chat_history, chatbot_data
|
1290 |
|
1291 |
# --- Gradio Interface Definition ---
|
1292 |
def create_modern_interface():
|
@@ -1454,6 +1506,11 @@ def create_modern_interface():
|
|
1454 |
value=True,
|
1455 |
info="Generate sequential QR codes for combined data"
|
1456 |
)
|
|
|
|
|
|
|
|
|
|
|
1457 |
process_btn = gr.Button(
|
1458 |
"π Process & Generate QR",
|
1459 |
variant="primary"
|
@@ -1477,11 +1534,18 @@ def create_modern_interface():
|
|
1477 |
with gr.Tab("π€ Chat with Data") as chat_tab:
|
1478 |
chat_history = gr.State([])
|
1479 |
chatbot = gr.Chatbot(label="Data Chatbot")
|
|
|
|
|
1480 |
with gr.Row():
|
1481 |
chat_input = gr.Textbox(label="Your Message", placeholder="Ask me about the processed data...")
|
1482 |
send_msg_btn = gr.Button("Send")
|
|
|
|
|
|
|
|
|
1483 |
clear_chat_btn = gr.Button("Clear Chat History")
|
1484 |
|
|
|
1485 |
def load_example():
|
1486 |
example = {
|
1487 |
"type": "product_catalog",
|
@@ -1539,8 +1603,8 @@ def create_modern_interface():
|
|
1539 |
|
1540 |
return viewport_html
|
1541 |
|
1542 |
-
def process_inputs(urls, files, text, combine, crawl_depth):
|
1543 |
-
"""Process all inputs and generate QR codes"""
|
1544 |
results = []
|
1545 |
processing_status_messages = []
|
1546 |
|
@@ -1602,18 +1666,23 @@ def create_modern_interface():
|
|
1602 |
|
1603 |
qr_paths = []
|
1604 |
final_json_output = None
|
|
|
1605 |
|
1606 |
if results:
|
1607 |
-
|
1608 |
-
|
1609 |
-
|
1610 |
-
|
1611 |
-
|
|
|
|
|
|
|
1612 |
else:
|
1613 |
-
processing_status_messages.append("
|
1614 |
-
|
1615 |
else:
|
1616 |
processing_status_messages.append("β οΈ No valid content collected from inputs.")
|
|
|
1617 |
|
1618 |
except Exception as e:
|
1619 |
logger.error(f"Overall processing error in process_inputs: {e}")
|
@@ -1640,7 +1709,7 @@ def create_modern_interface():
|
|
1640 |
|
1641 |
process_btn.click(
|
1642 |
process_inputs,
|
1643 |
-
inputs=[url_input, file_input, text_input, combine_data, crawl_depth_slider],
|
1644 |
outputs=[output_json, output_gallery, output_text, chatbot_data]
|
1645 |
).then(
|
1646 |
on_qr_generation,
|
@@ -1652,8 +1721,10 @@ def create_modern_interface():
|
|
1652 |
|
1653 |
send_msg_btn.click(
|
1654 |
respond_to_chat,
|
1655 |
-
inputs=[chat_input, chat_history, chatbot_data],
|
1656 |
-
outputs=[chatbot, chatbot_data]
|
|
|
|
|
1657 |
).then(
|
1658 |
lambda: "",
|
1659 |
inputs=None,
|
@@ -1671,9 +1742,73 @@ def create_modern_interface():
|
|
1671 |
)
|
1672 |
|
1673 |
clear_chat_btn.click(
|
1674 |
-
lambda: [],
|
1675 |
inputs=None,
|
1676 |
-
outputs=chatbot
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1677 |
)
|
1678 |
|
1679 |
gr.Markdown("""
|
@@ -1722,4 +1857,4 @@ def main():
|
|
1722 |
raise
|
1723 |
|
1724 |
if __name__ == "__main__":
|
1725 |
-
main()
|
|
|
972 |
return []
|
973 |
|
974 |
# --- Chatbot Logic ---
|
975 |
+
def respond_to_chat(
|
976 |
+
message: str,
|
977 |
+
chat_history: List[Tuple[str, str]],
|
978 |
+
chatbot_data: Optional[List[Dict]],
|
979 |
+
# Add current_filtered_df_state as input, it will be updated and returned
|
980 |
+
current_filtered_df_state: Optional[pd.DataFrame]
|
981 |
+
) -> Tuple[List[Tuple[str, str]], List[Dict], Optional[pd.DataFrame]]:
|
982 |
+
"""
|
983 |
+
Responds to user chat messages based on the loaded JSON data.
|
984 |
+
Manages and returns the state of the filtered DataFrame.
|
985 |
+
"""
|
986 |
if chatbot_data is None or not chatbot_data:
|
987 |
chat_history.append((message, "Please process some data first using the other tabs before chatting."))
|
988 |
+
return chat_history, chatbot_data, current_filtered_df_state # Return existing state
|
989 |
|
990 |
chat_history.append((message, ""))
|
991 |
|
992 |
response = ""
|
993 |
lower_message = message.lower().strip()
|
994 |
+
# Initialize new_filtered_df_state with the current state to preserve it unless a filter changes it
|
995 |
+
new_filtered_df_state = current_filtered_df_state
|
996 |
|
997 |
try:
|
998 |
# Attempt to flatten the data structure for easier querying
|
|
|
1109 |
else:
|
1110 |
response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
|
1111 |
|
1112 |
+
# Enhanced Filter data based on more complex conditions
|
1113 |
+
# Regex to capture: "filter by status active", "show items where category is 'electronics'", "find entries where price > 100"
|
1114 |
+
# It tries to capture:
|
1115 |
+
# 1. column_name (e.g., category, status, price)
|
1116 |
+
# 2. operator (e.g., is, equals, =, >, <, contains, starts with, ends with) - flexible operators
|
1117 |
+
# 3. value (e.g., 'electronics', active, 100) - can be quoted or unquoted
|
1118 |
+
filter_match = re.search(
|
1119 |
+
r'(?:filter|show items|show me items|find entries|select items|get items)\s+' # Optional action phrases
|
1120 |
+
r'(?:where|by|for|with|if)\s+' # Keyword indicating condition
|
1121 |
+
r'(\w+)\s+' # Column name
|
1122 |
+
r'(is|equals?|==|!=|>=?|<=?|contains?|starts with|ends with)\s+' # Operator
|
1123 |
+
r'([\'"]?[\w\s.-]+[\'"]?)', # Value (allows spaces, dots, hyphens if quoted, or single words)
|
1124 |
+
lower_message
|
1125 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1126 |
|
1127 |
+
if filter_match:
|
1128 |
+
column_name, operator, value_str = filter_match.groups()
|
1129 |
+
column_name = column_name.strip()
|
1130 |
+
operator = operator.strip().lower()
|
1131 |
+
value_str = value_str.strip().strip("'\"")
|
1132 |
|
1133 |
+
logger.info(f"Filter request: Column='{column_name}', Operator='{operator}', Value='{value_str}'")
|
1134 |
|
1135 |
+
if column_name not in df.columns:
|
1136 |
+
response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
|
1137 |
+
new_filtered_df_state = None # Clear previous filter if column not found
|
1138 |
+
else:
|
1139 |
+
# IMPORTANT: Always filter from the original full dataframe 'df'
|
1140 |
+
active_df_to_filter = df.copy()
|
1141 |
+
try:
|
1142 |
+
# Attempt to infer value type for comparison
|
1143 |
+
target_value: Any
|
1144 |
+
col_dtype = df[column_name].dtype
|
1145 |
+
|
1146 |
+
if pd.api.types.is_numeric_dtype(col_dtype) and operator in ['>', '>=', '<', '<=', '==', '!=']:
|
1147 |
+
try:
|
1148 |
+
target_value = float(value_str)
|
1149 |
+
col_series = pd.to_numeric(filtered_df[column_name], errors='coerce')
|
1150 |
+
except ValueError:
|
1151 |
+
response = f"For numeric column '{column_name}', '{value_str}' is not a valid number."
|
1152 |
+
target_value = None # Error case
|
1153 |
+
elif pd.api.types.is_bool_dtype(col_dtype) or value_str.lower() in ['true', 'false']:
|
1154 |
+
target_value = value_str.lower() == 'true'
|
1155 |
+
col_series = filtered_df[column_name].astype(bool, errors='ignore')
|
1156 |
+
else: # Assume string comparison otherwise
|
1157 |
+
target_value = str(value_str)
|
1158 |
+
col_series = filtered_df[column_name].astype(str).str.lower() # Case-insensitive for strings
|
1159 |
+
value_str_lower = target_value.lower()
|
1160 |
+
|
1161 |
+
|
1162 |
+
if 'response' not in locals(): # If no type conversion error occurred
|
1163 |
+
if operator in ['is', 'equals', '==']:
|
1164 |
+
if pd.api.types.is_numeric_dtype(col_dtype) or pd.api.types.is_bool_dtype(col_dtype):
|
1165 |
+
condition = col_series == target_value
|
1166 |
+
else: # String comparison
|
1167 |
+
condition = col_series == value_str_lower
|
1168 |
+
elif operator == '!=':
|
1169 |
+
if pd.api.types.is_numeric_dtype(col_dtype) or pd.api.types.is_bool_dtype(col_dtype):
|
1170 |
+
condition = col_series != target_value
|
1171 |
+
else: # String comparison
|
1172 |
+
condition = col_series != value_str_lower
|
1173 |
+
elif operator == '>' and pd.api.types.is_numeric_dtype(col_dtype):
|
1174 |
+
condition = col_series > target_value
|
1175 |
+
elif operator == '>=' and pd.api.types.is_numeric_dtype(col_dtype):
|
1176 |
+
condition = col_series >= target_value
|
1177 |
+
elif operator == '<' and pd.api.types.is_numeric_dtype(col_dtype):
|
1178 |
+
condition = col_series < target_value
|
1179 |
+
elif operator == '<=' and pd.api.types.is_numeric_dtype(col_dtype):
|
1180 |
+
condition = col_series <= target_value
|
1181 |
+
elif operator in ['contains', 'contain'] and pd.api.types.is_string_dtype(col_series):
|
1182 |
+
condition = col_series.str.contains(value_str_lower, case=False, na=False)
|
1183 |
+
elif operator == 'starts with' and pd.api.types.is_string_dtype(col_series):
|
1184 |
+
condition = col_series.str.startswith(value_str_lower, na=False)
|
1185 |
+
elif operator == 'ends with' and pd.api.types.is_string_dtype(col_series):
|
1186 |
+
condition = col_series.str.endswith(value_str_lower, na=False)
|
1187 |
else:
|
1188 |
+
response = f"Unsupported operator '{operator}' for column '{column_name}' (type: {col_dtype})."
|
1189 |
+
condition = None
|
1190 |
+
# If operator was bad, response is set, clear filter state
|
1191 |
+
if response: new_filtered_df_state = None
|
1192 |
+
|
1193 |
+
|
1194 |
+
if condition is not None:
|
1195 |
+
# Apply condition to the active_df_to_filter (which is a copy of the full df)
|
1196 |
+
filtered_results_df = active_df_to_filter[condition]
|
1197 |
+
if not filtered_results_df.empty:
|
1198 |
+
new_filtered_df_state = filtered_results_df # Update state with new filter result
|
1199 |
+
num_results = len(filtered_results_df)
|
1200 |
+
preview_rows = min(num_results, 5)
|
1201 |
+
preview_cols = min(len(filtered_results_df.columns), 5)
|
1202 |
+
|
1203 |
+
preview_df = filtered_results_df.head(preview_rows).iloc[:, :preview_cols]
|
1204 |
+
preview_str = preview_df.to_string(index=False)
|
1205 |
+
|
1206 |
+
response = (f"Found {num_results} items where '{column_name}' {operator} '{value_str}'.\n"
|
1207 |
+
f"Here's a preview:\n```\n{preview_str}\n```\n"
|
1208 |
+
f"The full filtered dataset is now available for download using the 'Download Filtered JSON' button.")
|
1209 |
+
else:
|
1210 |
+
new_filtered_df_state = pd.DataFrame() # Store empty DF for "no results"
|
1211 |
+
response = f"No items found where '{column_name}' {operator} '{value_str}'."
|
1212 |
+
# If condition is None (e.g. bad operator) and response not already set by type check, set generic invalid op message.
|
1213 |
+
elif not response: # Avoid overwriting specific error from type check
|
1214 |
+
response = f"Unsupported operator '{operator}' for column '{column_name}' (type: {col_dtype})."
|
1215 |
+
new_filtered_df_state = None
|
1216 |
+
|
1217 |
+
|
1218 |
+
except ValueError as ve: # Specifically catch ValueError for target_value conversion
|
1219 |
+
response = f"Invalid value '{value_str}' for numeric column '{column_name}'. {ve}"
|
1220 |
+
new_filtered_df_state = None # Clear on value error
|
1221 |
+
logger.warning(f"ValueError during filter: {ve}")
|
1222 |
+
except Exception as e:
|
1223 |
+
new_filtered_df_state = None # Clear on other errors
|
1224 |
+
response = f"An error occurred while applying the filter: {e}"
|
1225 |
+
logger.error(f"Error applying filter (column='{column_name}', op='{operator}', val='{value_str}'): {e}")
|
1226 |
+
# If the message was a filter, new_filtered_df_state is now set (or None/empty if error/no results)
|
1227 |
+
|
1228 |
+
# --- End of Enhanced Filter Logic ---
|
1229 |
|
1230 |
+
# If `response` is still empty, it means no filter query was matched by the filter_match regex.
|
1231 |
+
# In this case, new_filtered_df_state (initialized from current_filtered_df_state) remains unchanged.
|
1232 |
|
|
|
|
|
|
|
1233 |
|
1234 |
# Request structured output (e.g., as CSV or simplified JSON)
|
1235 |
+
# This section should act on the *original* df unless specifically asked for filtered data export.
|
1236 |
+
# The new download buttons handle filtered data export separately.
|
1237 |
+
# Let's assume for now it acts on the original df, and a separate command would be needed for "export filtered data"
|
1238 |
+
# If no filter query matched, and no other specific df query matched,
|
1239 |
+
# then `response` might still be empty. `new_filtered_df_state` will be the same as `current_filtered_df_state`.
|
1240 |
+
# The general queries below should not reset `new_filtered_df_state` unless it's a "clear" command.
|
1241 |
+
|
1242 |
+
elif "output as csv" in lower_message or "export as csv" in lower_message:
|
1243 |
if df is not None and not df.empty:
|
1244 |
csv_output = df.to_csv(index=False)
|
1245 |
response = f"Here is the data in CSV format:\n```csv\n{csv_output[:1000]}...\n```\n(Output truncated for chat display)"
|
1246 |
else:
|
1247 |
response = "There is no data available to output as CSV."
|
1248 |
+
elif "output as json" in lower_message or "export as json" in lower_message: # Note: "export as json" is different from download buttons
|
|
|
1249 |
if df is not None and not df.empty:
|
1250 |
json_output = df.to_json(orient='records', indent=2)
|
1251 |
response = f"Here is the data in JSON format:\n```json\n{json_output[:1000]}...\n```\n(Output truncated for chat display)"
|
1252 |
else:
|
1253 |
response = "There is no data available to output as JSON."
|
1254 |
|
1255 |
+
# --- General Queries (if no DataFrame or specific query matched AND no filter was applied in this turn) ---
|
1256 |
+
# These should not clear new_filtered_df_state unless it's a "clear chat"
|
1257 |
+
if not response: # Only enter if no response has been generated by DataFrame/filter logic
|
|
|
1258 |
if "how many items" in lower_message or "number of items" in lower_message:
|
1259 |
+
if new_filtered_df_state is not None and not new_filtered_df_state.empty:
|
1260 |
+
response = f"The currently filtered dataset has {len(new_filtered_df_state)} items. The original dataset has {len(df if df is not None else chatbot_data)} items."
|
1261 |
+
elif df is not None: # Check df from original chatbot_data
|
1262 |
+
response = f"There are {len(df)} top-level items in the processed data."
|
1263 |
+
elif isinstance(chatbot_data, list): # Fallback if df creation failed but chatbot_data is list
|
1264 |
+
response = f"There are {len(chatbot_data)} top-level items in the processed data (not in DataFrame)."
|
1265 |
elif isinstance(chatbot_data, dict):
|
1266 |
response = "The processed data is a single dictionary, not a list of items."
|
1267 |
else:
|
1268 |
response = "The processed data is not a standard list or dictionary structure."
|
1269 |
|
1270 |
elif "what is the structure" in lower_message or "tell me about the data" in lower_message:
|
1271 |
+
if new_filtered_df_state is not None and not new_filtered_df_state.empty:
|
1272 |
+
response = f"The filtered data has columns: {', '.join(new_filtered_df_state.columns)}. "
|
1273 |
+
if df is not None:
|
1274 |
+
response += f"The original data has columns: {', '.join(df.columns)}."
|
1275 |
+
else:
|
1276 |
+
response += "Original data structure is not tabular."
|
1277 |
+
elif df is not None:
|
1278 |
+
response = f"The data is a table with {len(df)} rows and columns: {', '.join(df.columns)}."
|
1279 |
+
elif isinstance(chatbot_data, list) and chatbot_data:
|
1280 |
sample_item = chatbot_data[0]
|
1281 |
+
response = f"The data is a list containing {len(chatbot_data)} items. The first item has the following top-level keys: {list(sample_item.keys())}."
|
1282 |
elif isinstance(chatbot_data, dict):
|
1283 |
response = f"The data is a dictionary with the following top-level keys: {list(chatbot_data.keys())}."
|
1284 |
else:
|
1285 |
response = "The processed data is not a standard list or dictionary structure that I can easily describe."
|
1286 |
|
1287 |
+
# "show me" without a filter condition might be ambiguous.
|
1288 |
+
# Let's assume it refers to the original data or provide guidance.
|
1289 |
elif "show me" in lower_message or "get me" in lower_message or "extract" in lower_message:
|
1290 |
+
# This specific 'show me' without 'where' should not trigger a filter or clear existing filter state.
|
1291 |
+
# It's a general request for data, which is too broad. Guide the user.
|
1292 |
+
response = "If you want to filter the data, please use a phrase like 'show me items where column_name is value'. If you want to see the raw data, consider using the download buttons."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1293 |
|
1294 |
# --- Speculation about Modifications ---
|
1295 |
elif "how can i modify" in lower_message or "how to change" in lower_message or "can i add" in lower_message or "can i remove" in lower_message:
|
1296 |
response = "I cannot directly modify the data here, but I can tell you how you *could* modify it. What kind of change are you considering (e.g., adding an item, changing a value, removing a field)?"
|
|
|
1297 |
elif "add a field" in lower_message or "add a column" in lower_message:
|
1298 |
response = "To add a field (or column if the data is tabular), you would typically iterate through each item (or row) in the data and add the new key-value pair. For example, adding a 'status' field with a default value."
|
|
|
1299 |
elif "change a value" in lower_message or "update a field" in lower_message:
|
1300 |
response = "To change a value, you would need to identify the specific item(s) and the field you want to update. You could use a condition (like filtering) to find the right items and then assign a new value to the field."
|
|
|
1301 |
elif "remove a field" in lower_message or "delete a column" in lower_message:
|
1302 |
response = "To remove a field, you would iterate through each item and delete the specified key. Be careful, as this is irreversible."
|
|
|
1303 |
elif "restructure" in lower_message or "change the format" in lower_message:
|
1304 |
response = "Restructuring data involves transforming it into a different shape. This could mean flattening nested objects, grouping items, or pivoting data. This often requires writing custom code to map the old structure to the new one."
|
|
|
1305 |
elif "what if i" in lower_message or "if i changed" in lower_message:
|
1306 |
response = "Tell me what specific change you're contemplating, and I can speculate on the potential impact or how you might approach it programmatically."
|
1307 |
|
|
|
1308 |
# --- General Conversation / Fallback ---
|
1309 |
elif "hello" in lower_message or "hi" in lower_message:
|
1310 |
response = random.choice(["Hello! How can I help you understand the processed data?", "Hi there! What's on your mind about this data?", "Hey! Ask me anything about the data you've loaded."])
|
|
|
1311 |
elif "thank you" in lower_message or "thanks" in lower_message:
|
1312 |
response = random.choice(["You're welcome!", "Glad I could help.", "No problem! Let me know if you have more questions about the data."])
|
1313 |
+
elif "clear chat" in lower_message: # This should be caught by button, but as text too
|
1314 |
+
chat_history = [] # Gradio handles this for the Chatbot component via button
|
|
|
1315 |
response = "Chat history cleared."
|
1316 |
+
new_filtered_df_state = None # Also clear filtered data on "clear chat" command by text
|
1317 |
+
elif not response: # Fallback if nothing else matched
|
1318 |
response = random.choice([
|
1319 |
+
"I can analyze the data you've processed. What would you like to know? Try asking to filter data, e.g., 'show items where status is active'.",
|
1320 |
+
"Ask me about the number of items, the structure, or values of specific fields. You can also filter data.",
|
1321 |
+
"I can perform basic analysis or filter the data. For example: 'filter by price > 100'.",
|
1322 |
+
"Tell me what you want to extract or filter from the data. Use phrases like 'show items where ...'.",
|
1323 |
+
"I'm equipped to filter your data. Try 'find entries where name contains widget'."
|
1324 |
])
|
1325 |
|
1326 |
except Exception as e:
|
1327 |
logger.error(f"Chatbot runtime error: {e}")
|
1328 |
response = f"An internal error occurred while processing your request: {e}"
|
1329 |
response += "\nPlease try rephrasing your question or clear the chat history."
|
1330 |
+
# On unexpected error, preserve the current_filtered_df_state rather than clearing or modifying it.
|
1331 |
+
# new_filtered_df_state = current_filtered_df_state # This line is effectively already done by initialization
|
1332 |
|
1333 |
+
if not response: # Final safety net for response, if it's somehow still empty
|
1334 |
+
response = "I'm not sure how to respond to that. Please try rephrasing or ask for help on available commands."
|
1335 |
+
|
1336 |
+
if chat_history and chat_history[-1][1] == "": # If last history entry is (user_msg, "")
|
1337 |
chat_history[-1] = (chat_history[-1][0], response)
|
1338 |
+
# else: # This case should ideally not be reached if chat_history.append((message, "")) is always called first.
|
1339 |
+
# chat_history.append((message, response)) # Avoids duplicate user message if something went wrong
|
1340 |
|
1341 |
+
return chat_history, chatbot_data, new_filtered_df_state
|
1342 |
|
1343 |
# --- Gradio Interface Definition ---
|
1344 |
def create_modern_interface():
|
|
|
1506 |
value=True,
|
1507 |
info="Generate sequential QR codes for combined data"
|
1508 |
)
|
1509 |
+
generate_qr_toggle = gr.Checkbox(
|
1510 |
+
label="Generate QR Codes",
|
1511 |
+
value=False, # Default to False as per task
|
1512 |
+
info="Enable to generate QR codes for the processed data."
|
1513 |
+
)
|
1514 |
process_btn = gr.Button(
|
1515 |
"π Process & Generate QR",
|
1516 |
variant="primary"
|
|
|
1534 |
with gr.Tab("π€ Chat with Data") as chat_tab:
|
1535 |
chat_history = gr.State([])
|
1536 |
chatbot = gr.Chatbot(label="Data Chatbot")
|
1537 |
+
filtered_chatbot_df_state = gr.State(None) # To store the filtered DataFrame
|
1538 |
+
|
1539 |
with gr.Row():
|
1540 |
chat_input = gr.Textbox(label="Your Message", placeholder="Ask me about the processed data...")
|
1541 |
send_msg_btn = gr.Button("Send")
|
1542 |
+
with gr.Row():
|
1543 |
+
download_full_json_btn = gr.Button("Download Full JSON")
|
1544 |
+
download_filtered_json_btn = gr.Button("Download Filtered JSON")
|
1545 |
+
download_file_output = gr.File(label="Download Data", interactive=False) # For triggering download
|
1546 |
clear_chat_btn = gr.Button("Clear Chat History")
|
1547 |
|
1548 |
+
|
1549 |
def load_example():
|
1550 |
example = {
|
1551 |
"type": "product_catalog",
|
|
|
1603 |
|
1604 |
return viewport_html
|
1605 |
|
1606 |
+
def process_inputs(urls, files, text, combine, crawl_depth, generate_qr_enabled):
|
1607 |
+
"""Process all inputs and generate QR codes based on toggle"""
|
1608 |
results = []
|
1609 |
processing_status_messages = []
|
1610 |
|
|
|
1666 |
|
1667 |
qr_paths = []
|
1668 |
final_json_output = None
|
1669 |
+
qr_paths = []
|
1670 |
|
1671 |
if results:
|
1672 |
+
final_json_output = results # Assign processed data regardless of QR generation
|
1673 |
+
if generate_qr_enabled:
|
1674 |
+
processing_status_messages.append("βοΈ Generating QR codes as requested...")
|
1675 |
+
qr_paths = generate_qr_codes(results, combine)
|
1676 |
+
if qr_paths:
|
1677 |
+
processing_status_messages.append(f"β
Successfully generated {len(qr_paths)} QR codes.")
|
1678 |
+
else:
|
1679 |
+
processing_status_messages.append("β Failed to generate QR codes (empty result or error).")
|
1680 |
else:
|
1681 |
+
processing_status_messages.append("βοΈ QR code generation was disabled. Processed data is available.")
|
1682 |
+
qr_paths = [] # Ensure it's empty
|
1683 |
else:
|
1684 |
processing_status_messages.append("β οΈ No valid content collected from inputs.")
|
1685 |
+
final_json_output = {} # Ensure output_json is cleared if no results
|
1686 |
|
1687 |
except Exception as e:
|
1688 |
logger.error(f"Overall processing error in process_inputs: {e}")
|
|
|
1709 |
|
1710 |
process_btn.click(
|
1711 |
process_inputs,
|
1712 |
+
inputs=[url_input, file_input, text_input, combine_data, crawl_depth_slider, generate_qr_toggle],
|
1713 |
outputs=[output_json, output_gallery, output_text, chatbot_data]
|
1714 |
).then(
|
1715 |
on_qr_generation,
|
|
|
1721 |
|
1722 |
send_msg_btn.click(
|
1723 |
respond_to_chat,
|
1724 |
+
inputs=[chat_input, chat_history, chatbot_data, filtered_chatbot_df_state],
|
1725 |
+
outputs=[chatbot, chatbot_data, filtered_chatbot_df_state]
|
1726 |
+
inputs=[chat_input, chat_history, chatbot_data, filtered_chatbot_df_state],
|
1727 |
+
outputs=[chatbot, chatbot_data, filtered_chatbot_df_state]
|
1728 |
).then(
|
1729 |
lambda: "",
|
1730 |
inputs=None,
|
|
|
1742 |
)
|
1743 |
|
1744 |
clear_chat_btn.click(
|
1745 |
+
lambda: ([], None), # Clear chat history and filtered data state
|
1746 |
inputs=None,
|
1747 |
+
outputs=[chatbot, filtered_chatbot_df_state]
|
1748 |
+
)
|
1749 |
+
|
1750 |
+
# --- Download Logic ---
|
1751 |
+
def download_json_data(data_df: Optional[pd.DataFrame], filename_prefix: str) -> Optional[str]:
|
1752 |
+
if data_df is None or data_df.empty:
|
1753 |
+
logger.info(f"No data provided for download with prefix '{filename_prefix}'.")
|
1754 |
+
return None
|
1755 |
+
try:
|
1756 |
+
data_list = data_df.to_dict(orient='records')
|
1757 |
+
json_str = json.dumps(data_list, indent=2, ensure_ascii=False)
|
1758 |
+
|
1759 |
+
timestamp = int(time.time())
|
1760 |
+
filename = f"{filename_prefix}_{timestamp}.json"
|
1761 |
+
file_path = TEMP_DIR / filename
|
1762 |
+
|
1763 |
+
with open(file_path, 'w', encoding='utf-8') as f:
|
1764 |
+
f.write(json_str)
|
1765 |
+
|
1766 |
+
logger.info(f"Successfully created JSON file for download: {file_path}")
|
1767 |
+
return str(file_path)
|
1768 |
+
except Exception as e:
|
1769 |
+
logger.error(f"Error creating JSON file for {filename_prefix}: {e}")
|
1770 |
+
return None
|
1771 |
+
|
1772 |
+
def handle_download_full_json(current_chatbot_data_state: Optional[List[Dict]]) -> Optional[str]:
|
1773 |
+
if not current_chatbot_data_state:
|
1774 |
+
logger.info("No full data available to download.")
|
1775 |
+
# Optionally, could return a gr.Warning or gr.Info to the UI if we had a dedicated status Textbox for downloads
|
1776 |
+
return None
|
1777 |
+
|
1778 |
+
# The chatbot_data state is a list of dicts. Convert to DataFrame for download_json_data.
|
1779 |
+
# The df created in respond_to_chat is not directly used here to ensure we get the *original* full data.
|
1780 |
+
try:
|
1781 |
+
# A similar flattening logic as in respond_to_chat might be needed if chatbot_data_state is complex
|
1782 |
+
# For now, assume it's a list of flat dictionaries or can be handled by pd.DataFrame directly.
|
1783 |
+
# If chatbot_data_state originates from `results` in `process_inputs`, it's a list of dicts.
|
1784 |
+
# A more robust approach would be to re-use the exact flattening from respond_to_chat if structures are nested.
|
1785 |
+
# Let's try a direct conversion first.
|
1786 |
+
df_to_download = pd.DataFrame(current_chatbot_data_state)
|
1787 |
+
if df_to_download.empty:
|
1788 |
+
logger.info("Full data resulted in an empty DataFrame. Nothing to download.")
|
1789 |
+
return None
|
1790 |
+
except Exception as e:
|
1791 |
+
logger.error(f"Error converting full chatbot_data to DataFrame for download: {e}")
|
1792 |
+
return None
|
1793 |
+
|
1794 |
+
return download_json_data(df_to_download, "full_data")
|
1795 |
+
|
1796 |
+
def handle_download_filtered_json(current_filtered_df_state: Optional[pd.DataFrame]) -> Optional[str]:
|
1797 |
+
if current_filtered_df_state is None or current_filtered_df_state.empty:
|
1798 |
+
logger.info("No filtered data available to download.")
|
1799 |
+
# Consider gr.Info("No filtered data to download.") if a text output for this is desired.
|
1800 |
+
return None
|
1801 |
+
return download_json_data(current_filtered_df_state, "filtered_data")
|
1802 |
+
|
1803 |
+
download_full_json_btn.click(
|
1804 |
+
fn=handle_download_full_json,
|
1805 |
+
inputs=[chatbot_data], # chatbot_data is the gr.State holding the full dataset (List[Dict])
|
1806 |
+
outputs=[download_file_output]
|
1807 |
+
)
|
1808 |
+
download_filtered_json_btn.click(
|
1809 |
+
fn=handle_download_filtered_json,
|
1810 |
+
inputs=[filtered_chatbot_df_state], # This state holds the filtered DataFrame
|
1811 |
+
outputs=[download_file_output]
|
1812 |
)
|
1813 |
|
1814 |
gr.Markdown("""
|
|
|
1857 |
raise
|
1858 |
|
1859 |
if __name__ == "__main__":
|
1860 |
+
main()
|