Spaces:

acecalisto3
/

urld

Running

App Files Files Community

acecalisto3 commited on 8 days ago

Commit

3a6d326

verified ·

1 Parent(s): 90ff6aa

Update app2.py

Browse files

Files changed (1) hide show

app2.py +278 -143

app2.py CHANGED Viewed

@@ -972,16 +972,27 @@ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> Li
         return []
 # --- Chatbot Logic ---
-def respond_to_chat(message: str, chat_history: List[Tuple[str, str]], chatbot_data: Optional[List[Dict]]) -> Tuple[List[Tuple[str, str]], List[Dict]]:
-    """Responds to user chat messages based on the loaded JSON data."""
     if chatbot_data is None or not chatbot_data:
         chat_history.append((message, "Please process some data first using the other tabs before chatting."))
-        return chat_history, chatbot_data
     chat_history.append((message, ""))
     response = ""
     lower_message = message.lower().strip()
     try:
         # Attempt to flatten the data structure for easier querying
@@ -1098,195 +1109,236 @@ def respond_to_chat(message: str, chat_history: List[Tuple[str, str]], chatbot_d
                 else:
                     response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
-            # Filter data based on a simple condition
-            match = re.search(r'show me items where (\w+)\s*([<>=!]+)\s*([\w"\']*)', lower_message) # Added quotes to value regex
-            if match:
-                column_name, operator, value_str = match.groups()
-                try:
-                    # Attempt to infer value type (numeric, string, boolean)
-                    value: Any
-                    is_numeric_comparison = False
-                    is_boolean_comparison = False
-                    if value_str.lower() in ['true', 'false']:
-                         value = value_str.lower() == 'true'
-                         is_boolean_comparison = True
-                    else:
-                        try:
-                            value = float(value_str.strip("'\""))
-                            is_numeric_comparison = True
-                        except ValueError:
-                            value = value_str.strip("'\"")
-                    if column_name in df.columns:
-                        if is_numeric_comparison:
-                            numeric_col = pd.to_numeric(df[column_name], errors='coerce')
-                            filtered_df = df.loc[pd.notna(numeric_col)]
-                            if operator == '>': filtered_results = filtered_df[numeric_col > value]
-                            elif operator == '<': filtered_results = filtered_df[numeric_col < value]
-                            elif operator == '>=': filtered_results = filtered_df[numeric_col >= value]
-                            elif operator == '<=': filtered_results = filtered_df[numeric_col <= value]
-                            elif operator == '==': filtered_results = filtered_df[numeric_col == value]
-                            elif operator == '!=': filtered_results = filtered_df[numeric_col != value]
-                            else:
-                                filtered_results = pd.DataFrame()
-                                response = f"Unsupported numeric operator: {operator}. Try >, <, >=, <=, ==, !=."
-                            if not filtered_results.empty:
-                                preview = filtered_results.to_json(orient='records', indent=2)[:500] + "..." if len(filtered_results.to_json()) > 500 else filtered_results.to_json(orient='records', indent=2)
-                                response = f"Here are the items where '{column_name}' {operator} {value_str}:\n```json\n{preview}\n```"
-                            elif 'response' not in locals():
-                                response = f"No items found where '{column_name}' {operator} {value_str}."
-                        elif is_boolean_comparison:
-                             # Ensure column is boolean or can be interpreted as boolean
-                             boolean_col = df[column_name].astype(bool, errors='ignore') # Coerce errors, might need more robust check
-                             if operator == '==': filtered_results = df[boolean_col == value]
-                             elif operator == '!=': filtered_results = df[boolean_col != value]
-                             else:
-                                  filtered_results = pd.DataFrame()
-                                  response = f"Unsupported boolean operator: {operator}. Try == or !=."
-                             if not filtered_results.empty:
-                                 preview = filtered_results.to_json(orient='records', indent=2)[:500] + "..." if len(filtered_results.to_json()) > 500 else filtered_results.to_json(orient='records', indent=2)
-                                 response = f"Here are the items where '{column_name}' is {value_str}:\n```json\n{preview}\n```"
-                             elif 'response' not in locals():
-                                 response = f"No items found where '{column_name}' is {value_str}."
-                        elif operator == '==':
-                            filtered_results = df[df[column_name] == value]
-                            if not filtered_results.empty:
-                                preview = filtered_results.to_json(orient='records', indent=2)[:500] + "..." if len(filtered_results.to_json()) > 500 else filtered_results.to_json(orient='records', indent=2)
-                                response = f"Here are the items where '{column_name}' is '{value}':\n```json\n{preview}\n```"
-                            else:
-                                response = f"No items found where '{column_name}' is '{value}'."
-                        elif operator == '!=':
-                            filtered_results = df[df[column_name] != value]
-                            if not filtered_results.empty:
-                                preview = filtered_results.to_json(orient='records', indent=2)[:500] + "..." if len(filtered_results.to_json()) > 500 else filtered_results.to_json(orient='records', indent=2)
-                                response = f"Here are the items where '{column_name}' is not '{value}':\n```json\n{preview}\n```"
                             else:
-                                response = f"All items have '{column_name}' as '{value}' or the column doesn't exist."
-                        else:
-                             response = f"Unsupported operator for string comparison: {operator}. Try == or !=."
-                    else:
-                        response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
-                except Exception as e:
-                     response = f"An error occurred while filtering data: {e}"
-                     logger.error(f"Error filtering data based on condition: {e}")
             # Request structured output (e.g., as CSV or simplified JSON)
-            if "output as csv" in lower_message or "export as csv" in lower_message:
                  if df is not None and not df.empty:
                       csv_output = df.to_csv(index=False)
                       response = f"Here is the data in CSV format:\n```csv\n{csv_output[:1000]}...\n```\n(Output truncated for chat display)"
                  else:
                       response = "There is no data available to output as CSV."
-            elif "output as json" in lower_message or "export as json" in lower_message:
                  if df is not None and not df.empty:
                       json_output = df.to_json(orient='records', indent=2)
                       response = f"Here is the data in JSON format:\n```json\n{json_output[:1000]}...\n```\n(Output truncated for chat display)"
                  else:
                       response = "There is no data available to output as JSON."
-        # --- General Queries (if no DataFrame or specific query matched) ---
-        if not response:
             if "how many items" in lower_message or "number of items" in lower_message:
-                if isinstance(chatbot_data, list):
-                    response = f"There are {len(chatbot_data)} top-level items in the processed data."
                 elif isinstance(chatbot_data, dict):
                     response = "The processed data is a single dictionary, not a list of items."
                 else:
                     response = "The processed data is not a standard list or dictionary structure."
             elif "what is the structure" in lower_message or "tell me about the data" in lower_message:
-                if isinstance(chatbot_data, list) and chatbot_data:
                     sample_item = chatbot_data[0]
-                    response = f"The data is a list containing {len(chatbot_data)} items. The first item has the following top-level keys: {list(sample_item.keys())}. I can try to tell you more about specific keys if you like."
                 elif isinstance(chatbot_data, dict):
                      response = f"The data is a dictionary with the following top-level keys: {list(chatbot_data.keys())}."
                 else:
                     response = "The processed data is not a standard list or dictionary structure that I can easily describe."
             elif "show me" in lower_message or "get me" in lower_message or "extract" in lower_message:
-                 parts = lower_message.split("show me")
-                 if len(parts) > 1:
-                     key_request = parts[1].strip().split(" ")[0]
-                     extracted_values = []
-                     if isinstance(chatbot_data, list):
-                         for item in chatbot_data:
-                             if isinstance(item, dict) and key_request in item:
-                                 extracted_values.append(item[key_request])
-                     elif isinstance(chatbot_data, dict) and key_request in chatbot_data:
-                         extracted_values.append(chatbot_data[key_request])
-                     if extracted_values:
-                         preview = json.dumps(extracted_values, indent=2)[:500] + "..." if len(json.dumps(extracted_values)) > 500 else json.dumps(extracted_values, indent=2)
-                         response = f"Here are the values for '{key_request}':\n```json\n{preview}\n```"
-                     else:
-                         response = f"I couldn't find a key named '{key_request}' in the top level of the data items."
-                 else:
-                     response = "What specifically would you like me to show or extract?"
             # --- Speculation about Modifications ---
             elif "how can i modify" in lower_message or "how to change" in lower_message or "can i add" in lower_message or "can i remove" in lower_message:
                  response = "I cannot directly modify the data here, but I can tell you how you *could* modify it. What kind of change are you considering (e.g., adding an item, changing a value, removing a field)?"
             elif "add a field" in lower_message or "add a column" in lower_message:
                  response = "To add a field (or column if the data is tabular), you would typically iterate through each item (or row) in the data and add the new key-value pair. For example, adding a 'status' field with a default value."
             elif "change a value" in lower_message or "update a field" in lower_message:
                  response = "To change a value, you would need to identify the specific item(s) and the field you want to update. You could use a condition (like filtering) to find the right items and then assign a new value to the field."
             elif "remove a field" in lower_message or "delete a column" in lower_message:
                  response = "To remove a field, you would iterate through each item and delete the specified key. Be careful, as this is irreversible."
             elif "restructure" in lower_message or "change the format" in lower_message:
                  response = "Restructuring data involves transforming it into a different shape. This could mean flattening nested objects, grouping items, or pivoting data. This often requires writing custom code to map the old structure to the new one."
             elif "what if i" in lower_message or "if i changed" in lower_message:
                  response = "Tell me what specific change you're contemplating, and I can speculate on the potential impact or how you might approach it programmatically."
             # --- General Conversation / Fallback ---
             elif "hello" in lower_message or "hi" in lower_message:
                 response = random.choice(["Hello! How can I help you understand the processed data?", "Hi there! What's on your mind about this data?", "Hey! Ask me anything about the data you've loaded."])
             elif "thank you" in lower_message or "thanks" in lower_message:
                 response = random.choice(["You're welcome!", "Glad I could help.", "No problem! Let me know if you have more questions about the data."])
-            elif "clear chat" in lower_message:
-                 chat_history = []
                  response = "Chat history cleared."
-            elif not response:
                  response = random.choice([
-                    "I can analyze the data you've processed. What would you like to know?",
-                    "Ask me about the number of items, the structure, or values of specific fields.",
-                    "I can perform basic analysis like counting unique values or calculating sums/averages if the data is suitable.",
-                    "Tell me what you want to extract or filter from the data.",
-                    "I'm still learning, but I can try to answer questions about the data structure and content."
                  ])
     except Exception as e:
         logger.error(f"Chatbot runtime error: {e}")
         response = f"An internal error occurred while processing your request: {e}"
         response += "\nPlease try rephrasing your question or clear the chat history."
-    if chat_history and chat_history[-1][1] == "":
         chat_history[-1] = (chat_history[-1][0], response)
-    else:
-        chat_history.append(("", response))
-    return chat_history, chatbot_data
 # --- Gradio Interface Definition ---
 def create_modern_interface():
@@ -1454,6 +1506,11 @@ def create_modern_interface():
                 value=True,
                 info="Generate sequential QR codes for combined data"
             )
             process_btn = gr.Button(
                 "🔄 Process & Generate QR",
                 variant="primary"
@@ -1477,11 +1534,18 @@ def create_modern_interface():
         with gr.Tab("🤖 Chat with Data") as chat_tab:
              chat_history = gr.State([])
              chatbot = gr.Chatbot(label="Data Chatbot")
              with gr.Row():
                   chat_input = gr.Textbox(label="Your Message", placeholder="Ask me about the processed data...")
                   send_msg_btn = gr.Button("Send")
              clear_chat_btn = gr.Button("Clear Chat History")
         def load_example():
             example = {
                 "type": "product_catalog",
@@ -1539,8 +1603,8 @@ def create_modern_interface():
             return viewport_html
-        def process_inputs(urls, files, text, combine, crawl_depth):
-            """Process all inputs and generate QR codes"""
             results = []
             processing_status_messages = []
@@ -1602,18 +1666,23 @@ def create_modern_interface():
                 qr_paths = []
                 final_json_output = None
                 if results:
-                    qr_paths = generate_qr_codes(results, combine)
-                    final_json_output = results
-                    if qr_paths:
-                        processing_status_messages.append(f"✅ Successfully generated {len(qr_paths)} QR codes.")
                     else:
-                        processing_status_messages.append("❌ Failed to generate QR codes.")
                 else:
                     processing_status_messages.append("⚠️ No valid content collected from inputs.")
             except Exception as e:
                 logger.error(f"Overall processing error in process_inputs: {e}")
@@ -1640,7 +1709,7 @@ def create_modern_interface():
         process_btn.click(
             process_inputs,
-            inputs=[url_input, file_input, text_input, combine_data, crawl_depth_slider],
             outputs=[output_json, output_gallery, output_text, chatbot_data]
         ).then(
             on_qr_generation,
@@ -1652,8 +1721,10 @@ def create_modern_interface():
         send_msg_btn.click(
             respond_to_chat,
-            inputs=[chat_input, chat_history, chatbot_data],
-            outputs=[chatbot, chatbot_data]
         ).then(
             lambda: "",
             inputs=None,
@@ -1671,9 +1742,73 @@ def create_modern_interface():
         )
         clear_chat_btn.click(
-            lambda: [],
             inputs=None,
-            outputs=chatbot
         )
         gr.Markdown("""
@@ -1722,4 +1857,4 @@ def main():
         raise
 if __name__ == "__main__":
-    main()

         return []
 # --- Chatbot Logic ---
+def respond_to_chat(
+    message: str,
+    chat_history: List[Tuple[str, str]],
+    chatbot_data: Optional[List[Dict]],
+    # Add current_filtered_df_state as input, it will be updated and returned
+    current_filtered_df_state: Optional[pd.DataFrame]
+) -> Tuple[List[Tuple[str, str]], List[Dict], Optional[pd.DataFrame]]:
+    """
+    Responds to user chat messages based on the loaded JSON data.
+    Manages and returns the state of the filtered DataFrame.
+    """
     if chatbot_data is None or not chatbot_data:
         chat_history.append((message, "Please process some data first using the other tabs before chatting."))
+        return chat_history, chatbot_data, current_filtered_df_state # Return existing state
     chat_history.append((message, ""))
     response = ""
     lower_message = message.lower().strip()
+    # Initialize new_filtered_df_state with the current state to preserve it unless a filter changes it
+    new_filtered_df_state = current_filtered_df_state
     try:
         # Attempt to flatten the data structure for easier querying
                 else:
                     response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
+            # Enhanced Filter data based on more complex conditions
+            # Regex to capture: "filter by status active", "show items where category is 'electronics'", "find entries where price > 100"
+            # It tries to capture:
+            # 1. column_name (e.g., category, status, price)
+            # 2. operator (e.g., is, equals, =, >, <, contains, starts with, ends with) - flexible operators
+            # 3. value (e.g., 'electronics', active, 100) - can be quoted or unquoted
+            filter_match = re.search(
+                r'(?:filter|show items|show me items|find entries|select items|get items)\s+' # Optional action phrases
+                r'(?:where|by|for|with|if)\s+' # Keyword indicating condition
+                r'(\w+)\s+' # Column name
+                r'(is|equals?|==|!=|>=?|<=?|contains?|starts with|ends with)\s+' # Operator
+                r'([\'"]?[\w\s.-]+[\'"]?)', # Value (allows spaces, dots, hyphens if quoted, or single words)
+                lower_message
+            )
+            if filter_match:
+                column_name, operator, value_str = filter_match.groups()
+                column_name = column_name.strip()
+                operator = operator.strip().lower()
+                value_str = value_str.strip().strip("'\"")
+                logger.info(f"Filter request: Column='{column_name}', Operator='{operator}', Value='{value_str}'")
+                if column_name not in df.columns:
+                    response = f"I couldn't find a column named '{column_name}'. Available columns are: {', '.join(df.columns)}"
+                    new_filtered_df_state = None # Clear previous filter if column not found
+                else:
+                    # IMPORTANT: Always filter from the original full dataframe 'df'
+                    active_df_to_filter = df.copy()
+                    try:
+                        # Attempt to infer value type for comparison
+                        target_value: Any
+                        col_dtype = df[column_name].dtype
+                        if pd.api.types.is_numeric_dtype(col_dtype) and operator in ['>', '>=', '<', '<=', '==', '!=']:
+                            try:
+                                target_value = float(value_str)
+                                col_series = pd.to_numeric(filtered_df[column_name], errors='coerce')
+                            except ValueError:
+                                response = f"For numeric column '{column_name}', '{value_str}' is not a valid number."
+                                target_value = None # Error case
+                        elif pd.api.types.is_bool_dtype(col_dtype) or value_str.lower() in ['true', 'false']:
+                            target_value = value_str.lower() == 'true'
+                            col_series = filtered_df[column_name].astype(bool, errors='ignore')
+                        else: # Assume string comparison otherwise
+                            target_value = str(value_str)
+                            col_series = filtered_df[column_name].astype(str).str.lower() # Case-insensitive for strings
+                            value_str_lower = target_value.lower()
+                        if 'response' not in locals(): # If no type conversion error occurred
+                            if operator in ['is', 'equals', '==']:
+                                if pd.api.types.is_numeric_dtype(col_dtype) or pd.api.types.is_bool_dtype(col_dtype):
+                                    condition = col_series == target_value
+                                else: # String comparison
+                                    condition = col_series == value_str_lower
+                            elif operator == '!=':
+                                if pd.api.types.is_numeric_dtype(col_dtype) or pd.api.types.is_bool_dtype(col_dtype):
+                                    condition = col_series != target_value
+                                else: # String comparison
+                                    condition = col_series != value_str_lower
+                            elif operator == '>' and pd.api.types.is_numeric_dtype(col_dtype):
+                                condition = col_series > target_value
+                            elif operator == '>=' and pd.api.types.is_numeric_dtype(col_dtype):
+                                condition = col_series >= target_value
+                            elif operator == '<' and pd.api.types.is_numeric_dtype(col_dtype):
+                                condition = col_series < target_value
+                            elif operator == '<=' and pd.api.types.is_numeric_dtype(col_dtype):
+                                condition = col_series <= target_value
+                            elif operator in ['contains', 'contain'] and pd.api.types.is_string_dtype(col_series):
+                                condition = col_series.str.contains(value_str_lower, case=False, na=False)
+                            elif operator == 'starts with' and pd.api.types.is_string_dtype(col_series):
+                                condition = col_series.str.startswith(value_str_lower, na=False)
+                            elif operator == 'ends with' and pd.api.types.is_string_dtype(col_series):
+                                condition = col_series.str.endswith(value_str_lower, na=False)
                             else:
+                                response = f"Unsupported operator '{operator}' for column '{column_name}' (type: {col_dtype})."
+                                condition = None
+                                # If operator was bad, response is set, clear filter state
+                                if response: new_filtered_df_state = None
+                            if condition is not None:
+                                # Apply condition to the active_df_to_filter (which is a copy of the full df)
+                                filtered_results_df = active_df_to_filter[condition]
+                                if not filtered_results_df.empty:
+                                    new_filtered_df_state = filtered_results_df # Update state with new filter result
+                                    num_results = len(filtered_results_df)
+                                    preview_rows = min(num_results, 5)
+                                    preview_cols = min(len(filtered_results_df.columns), 5)
+                                    preview_df = filtered_results_df.head(preview_rows).iloc[:, :preview_cols]
+                                    preview_str = preview_df.to_string(index=False)
+                                    response = (f"Found {num_results} items where '{column_name}' {operator} '{value_str}'.\n"
+                                                f"Here's a preview:\n```\n{preview_str}\n```\n"
+                                                f"The full filtered dataset is now available for download using the 'Download Filtered JSON' button.")
+                                else:
+                                    new_filtered_df_state = pd.DataFrame() # Store empty DF for "no results"
+                                    response = f"No items found where '{column_name}' {operator} '{value_str}'."
+                            # If condition is None (e.g. bad operator) and response not already set by type check, set generic invalid op message.
+                            elif not response: # Avoid overwriting specific error from type check
+                                response = f"Unsupported operator '{operator}' for column '{column_name}' (type: {col_dtype})."
+                                new_filtered_df_state = None
+                    except ValueError as ve: # Specifically catch ValueError for target_value conversion
+                        response = f"Invalid value '{value_str}' for numeric column '{column_name}'. {ve}"
+                        new_filtered_df_state = None # Clear on value error
+                        logger.warning(f"ValueError during filter: {ve}")
+                    except Exception as e:
+                        new_filtered_df_state = None # Clear on other errors
+                        response = f"An error occurred while applying the filter: {e}"
+                        logger.error(f"Error applying filter (column='{column_name}', op='{operator}', val='{value_str}'): {e}")
+                # If the message was a filter, new_filtered_df_state is now set (or None/empty if error/no results)
+            # --- End of Enhanced Filter Logic ---
+            # If `response` is still empty, it means no filter query was matched by the filter_match regex.
+            # In this case, new_filtered_df_state (initialized from current_filtered_df_state) remains unchanged.
             # Request structured output (e.g., as CSV or simplified JSON)
+            # This section should act on the *original* df unless specifically asked for filtered data export.
+            # The new download buttons handle filtered data export separately.
+            # Let's assume for now it acts on the original df, and a separate command would be needed for "export filtered data"
+            # If no filter query matched, and no other specific df query matched,
+            # then `response` might still be empty. `new_filtered_df_state` will be the same as `current_filtered_df_state`.
+            # The general queries below should not reset `new_filtered_df_state` unless it's a "clear" command.
+            elif "output as csv" in lower_message or "export as csv" in lower_message:
                  if df is not None and not df.empty:
                       csv_output = df.to_csv(index=False)
                       response = f"Here is the data in CSV format:\n```csv\n{csv_output[:1000]}...\n```\n(Output truncated for chat display)"
                  else:
                       response = "There is no data available to output as CSV."
+            elif "output as json" in lower_message or "export as json" in lower_message: # Note: "export as json" is different from download buttons
                  if df is not None and not df.empty:
                       json_output = df.to_json(orient='records', indent=2)
                       response = f"Here is the data in JSON format:\n```json\n{json_output[:1000]}...\n```\n(Output truncated for chat display)"
                  else:
                       response = "There is no data available to output as JSON."
+        # --- General Queries (if no DataFrame or specific query matched AND no filter was applied in this turn) ---
+        # These should not clear new_filtered_df_state unless it's a "clear chat"
+        if not response: # Only enter if no response has been generated by DataFrame/filter logic
             if "how many items" in lower_message or "number of items" in lower_message:
+                if new_filtered_df_state is not None and not new_filtered_df_state.empty:
+                    response = f"The currently filtered dataset has {len(new_filtered_df_state)} items. The original dataset has {len(df if df is not None else chatbot_data)} items."
+                elif df is not None: # Check df from original chatbot_data
+                    response = f"There are {len(df)} top-level items in the processed data."
+                elif isinstance(chatbot_data, list): # Fallback if df creation failed but chatbot_data is list
+                    response = f"There are {len(chatbot_data)} top-level items in the processed data (not in DataFrame)."
                 elif isinstance(chatbot_data, dict):
                     response = "The processed data is a single dictionary, not a list of items."
                 else:
                     response = "The processed data is not a standard list or dictionary structure."
             elif "what is the structure" in lower_message or "tell me about the data" in lower_message:
+                if new_filtered_df_state is not None and not new_filtered_df_state.empty:
+                     response = f"The filtered data has columns: {', '.join(new_filtered_df_state.columns)}. "
+                     if df is not None:
+                         response += f"The original data has columns: {', '.join(df.columns)}."
+                     else:
+                         response += "Original data structure is not tabular."
+                elif df is not None:
+                     response = f"The data is a table with {len(df)} rows and columns: {', '.join(df.columns)}."
+                elif isinstance(chatbot_data, list) and chatbot_data:
                     sample_item = chatbot_data[0]
+                    response = f"The data is a list containing {len(chatbot_data)} items. The first item has the following top-level keys: {list(sample_item.keys())}."
                 elif isinstance(chatbot_data, dict):
                      response = f"The data is a dictionary with the following top-level keys: {list(chatbot_data.keys())}."
                 else:
                     response = "The processed data is not a standard list or dictionary structure that I can easily describe."
+            # "show me" without a filter condition might be ambiguous.
+            # Let's assume it refers to the original data or provide guidance.
             elif "show me" in lower_message or "get me" in lower_message or "extract" in lower_message:
+                 # This specific 'show me' without 'where' should not trigger a filter or clear existing filter state.
+                 # It's a general request for data, which is too broad. Guide the user.
+                 response = "If you want to filter the data, please use a phrase like 'show me items where column_name is value'. If you want to see the raw data, consider using the download buttons."
             # --- Speculation about Modifications ---
             elif "how can i modify" in lower_message or "how to change" in lower_message or "can i add" in lower_message or "can i remove" in lower_message:
                  response = "I cannot directly modify the data here, but I can tell you how you *could* modify it. What kind of change are you considering (e.g., adding an item, changing a value, removing a field)?"
             elif "add a field" in lower_message or "add a column" in lower_message:
                  response = "To add a field (or column if the data is tabular), you would typically iterate through each item (or row) in the data and add the new key-value pair. For example, adding a 'status' field with a default value."
             elif "change a value" in lower_message or "update a field" in lower_message:
                  response = "To change a value, you would need to identify the specific item(s) and the field you want to update. You could use a condition (like filtering) to find the right items and then assign a new value to the field."
             elif "remove a field" in lower_message or "delete a column" in lower_message:
                  response = "To remove a field, you would iterate through each item and delete the specified key. Be careful, as this is irreversible."
             elif "restructure" in lower_message or "change the format" in lower_message:
                  response = "Restructuring data involves transforming it into a different shape. This could mean flattening nested objects, grouping items, or pivoting data. This often requires writing custom code to map the old structure to the new one."
             elif "what if i" in lower_message or "if i changed" in lower_message:
                  response = "Tell me what specific change you're contemplating, and I can speculate on the potential impact or how you might approach it programmatically."
             # --- General Conversation / Fallback ---
             elif "hello" in lower_message or "hi" in lower_message:
                 response = random.choice(["Hello! How can I help you understand the processed data?", "Hi there! What's on your mind about this data?", "Hey! Ask me anything about the data you've loaded."])
             elif "thank you" in lower_message or "thanks" in lower_message:
                 response = random.choice(["You're welcome!", "Glad I could help.", "No problem! Let me know if you have more questions about the data."])
+            elif "clear chat" in lower_message: # This should be caught by button, but as text too
+                 chat_history = [] # Gradio handles this for the Chatbot component via button
                  response = "Chat history cleared."
+                 new_filtered_df_state = None # Also clear filtered data on "clear chat" command by text
+            elif not response: # Fallback if nothing else matched
                  response = random.choice([
+                    "I can analyze the data you've processed. What would you like to know? Try asking to filter data, e.g., 'show items where status is active'.",
+                    "Ask me about the number of items, the structure, or values of specific fields. You can also filter data.",
+                    "I can perform basic analysis or filter the data. For example: 'filter by price > 100'.",
+                    "Tell me what you want to extract or filter from the data. Use phrases like 'show items where ...'.",
+                    "I'm equipped to filter your data. Try 'find entries where name contains widget'."
                  ])
     except Exception as e:
         logger.error(f"Chatbot runtime error: {e}")
         response = f"An internal error occurred while processing your request: {e}"
         response += "\nPlease try rephrasing your question or clear the chat history."
+        # On unexpected error, preserve the current_filtered_df_state rather than clearing or modifying it.
+        # new_filtered_df_state = current_filtered_df_state # This line is effectively already done by initialization
+    if not response: # Final safety net for response, if it's somehow still empty
+        response = "I'm not sure how to respond to that. Please try rephrasing or ask for help on available commands."
+    if chat_history and chat_history[-1][1] == "": # If last history entry is (user_msg, "")
         chat_history[-1] = (chat_history[-1][0], response)
+    # else: # This case should ideally not be reached if chat_history.append((message, "")) is always called first.
+    #    chat_history.append((message, response)) # Avoids duplicate user message if something went wrong
+    return chat_history, chatbot_data, new_filtered_df_state
 # --- Gradio Interface Definition ---
 def create_modern_interface():
                 value=True,
                 info="Generate sequential QR codes for combined data"
             )
+            generate_qr_toggle = gr.Checkbox(
+                label="Generate QR Codes",
+                value=False, # Default to False as per task
+                info="Enable to generate QR codes for the processed data."
+            )
             process_btn = gr.Button(
                 "🔄 Process & Generate QR",
                 variant="primary"
         with gr.Tab("🤖 Chat with Data") as chat_tab:
              chat_history = gr.State([])
              chatbot = gr.Chatbot(label="Data Chatbot")
+             filtered_chatbot_df_state = gr.State(None) # To store the filtered DataFrame
              with gr.Row():
                   chat_input = gr.Textbox(label="Your Message", placeholder="Ask me about the processed data...")
                   send_msg_btn = gr.Button("Send")
+             with gr.Row():
+                download_full_json_btn = gr.Button("Download Full JSON")
+                download_filtered_json_btn = gr.Button("Download Filtered JSON")
+             download_file_output = gr.File(label="Download Data", interactive=False) # For triggering download
              clear_chat_btn = gr.Button("Clear Chat History")
         def load_example():
             example = {
                 "type": "product_catalog",
             return viewport_html
+        def process_inputs(urls, files, text, combine, crawl_depth, generate_qr_enabled):
+            """Process all inputs and generate QR codes based on toggle"""
             results = []
             processing_status_messages = []
                 qr_paths = []
                 final_json_output = None
+                qr_paths = []
                 if results:
+                    final_json_output = results # Assign processed data regardless of QR generation
+                    if generate_qr_enabled:
+                        processing_status_messages.append("⚙️ Generating QR codes as requested...")
+                        qr_paths = generate_qr_codes(results, combine)
+                        if qr_paths:
+                            processing_status_messages.append(f"✅ Successfully generated {len(qr_paths)} QR codes.")
+                        else:
+                            processing_status_messages.append("❌ Failed to generate QR codes (empty result or error).")
                     else:
+                        processing_status_messages.append("☑️ QR code generation was disabled. Processed data is available.")
+                        qr_paths = [] # Ensure it's empty
                 else:
                     processing_status_messages.append("⚠️ No valid content collected from inputs.")
+                    final_json_output = {} # Ensure output_json is cleared if no results
             except Exception as e:
                 logger.error(f"Overall processing error in process_inputs: {e}")
         process_btn.click(
             process_inputs,
+            inputs=[url_input, file_input, text_input, combine_data, crawl_depth_slider, generate_qr_toggle],
             outputs=[output_json, output_gallery, output_text, chatbot_data]
         ).then(
             on_qr_generation,
         send_msg_btn.click(
             respond_to_chat,
+            inputs=[chat_input, chat_history, chatbot_data, filtered_chatbot_df_state],
+            outputs=[chatbot, chatbot_data, filtered_chatbot_df_state]
+            inputs=[chat_input, chat_history, chatbot_data, filtered_chatbot_df_state],
+            outputs=[chatbot, chatbot_data, filtered_chatbot_df_state]
         ).then(
             lambda: "",
             inputs=None,
         )
         clear_chat_btn.click(
+            lambda: ([], None), # Clear chat history and filtered data state
             inputs=None,
+            outputs=[chatbot, filtered_chatbot_df_state]
+        )
+        # --- Download Logic ---
+        def download_json_data(data_df: Optional[pd.DataFrame], filename_prefix: str) -> Optional[str]:
+            if data_df is None or data_df.empty:
+                logger.info(f"No data provided for download with prefix '{filename_prefix}'.")
+                return None
+            try:
+                data_list = data_df.to_dict(orient='records')
+                json_str = json.dumps(data_list, indent=2, ensure_ascii=False)
+                timestamp = int(time.time())
+                filename = f"{filename_prefix}_{timestamp}.json"
+                file_path = TEMP_DIR / filename
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    f.write(json_str)
+                logger.info(f"Successfully created JSON file for download: {file_path}")
+                return str(file_path)
+            except Exception as e:
+                logger.error(f"Error creating JSON file for {filename_prefix}: {e}")
+                return None
+        def handle_download_full_json(current_chatbot_data_state: Optional[List[Dict]]) -> Optional[str]:
+            if not current_chatbot_data_state:
+                logger.info("No full data available to download.")
+                # Optionally, could return a gr.Warning or gr.Info to the UI if we had a dedicated status Textbox for downloads
+                return None
+            # The chatbot_data state is a list of dicts. Convert to DataFrame for download_json_data.
+            # The df created in respond_to_chat is not directly used here to ensure we get the *original* full data.
+            try:
+                # A similar flattening logic as in respond_to_chat might be needed if chatbot_data_state is complex
+                # For now, assume it's a list of flat dictionaries or can be handled by pd.DataFrame directly.
+                # If chatbot_data_state originates from `results` in `process_inputs`, it's a list of dicts.
+                # A more robust approach would be to re-use the exact flattening from respond_to_chat if structures are nested.
+                # Let's try a direct conversion first.
+                df_to_download = pd.DataFrame(current_chatbot_data_state)
+                if df_to_download.empty:
+                    logger.info("Full data resulted in an empty DataFrame. Nothing to download.")
+                    return None
+            except Exception as e:
+                logger.error(f"Error converting full chatbot_data to DataFrame for download: {e}")
+                return None
+            return download_json_data(df_to_download, "full_data")
+        def handle_download_filtered_json(current_filtered_df_state: Optional[pd.DataFrame]) -> Optional[str]:
+            if current_filtered_df_state is None or current_filtered_df_state.empty:
+                logger.info("No filtered data available to download.")
+                # Consider gr.Info("No filtered data to download.") if a text output for this is desired.
+                return None
+            return download_json_data(current_filtered_df_state, "filtered_data")
+        download_full_json_btn.click(
+            fn=handle_download_full_json,
+            inputs=[chatbot_data], # chatbot_data is the gr.State holding the full dataset (List[Dict])
+            outputs=[download_file_output]
+        )
+        download_filtered_json_btn.click(
+            fn=handle_download_filtered_json,
+            inputs=[filtered_chatbot_df_state], # This state holds the filtered DataFrame
+            outputs=[download_file_output]
         )
         gr.Markdown("""
         raise
 if __name__ == "__main__":
+    main()