watsonx.ai_Vector_Embedding_Visualizer_MNB_Workshops

Sleeping

App Files Files Community

MilanM commited on May 29

Commit

5a858f9

verified ·

1 Parent(s): 9469a0b

Upload 2 files

Browse files

Files changed (2) hide show

helper_functions/helper_functions.py +496 -0
helper_functions/table_helper_functions.py +393 -0

helper_functions/helper_functions.py ADDED Viewed

	@@ -0,0 +1,496 @@

+from ibm_watsonx_ai import APIClient, Credentials
+from typing import Dict, Optional, List, Union, Any, Set
+import pandas as pd
+import marimo as mo
+import json
+import glob
+import io
+import os
+def get_cred_value(key, creds_var_name="baked_in_creds", default=""):
+    """
+    Helper function to safely get a value from a credentials dictionary.
+    Searches for credentials in:
+    1. Global variables with the specified variable name
+    2. Imported modules containing the specified variable name
+    Args:
+        key: The key to look up in the credentials dictionary.
+        creds_var_name: The variable name of the credentials dictionary.
+        default: The default value to return if the key is not found.
+    Returns:
+        The value from the credentials dictionary if it exists and contains the key,
+        otherwise returns the default value.
+    """
+    # Check if the credentials variable exists in globals
+    if creds_var_name in globals():
+        creds_dict = globals()[creds_var_name]
+        if isinstance(creds_dict, dict) and key in creds_dict:
+            return creds_dict[key]
+    # Check if credentials are in an imported module
+    import sys
+    for module_name, module_obj in sys.modules.items():
+        if hasattr(module_obj, creds_var_name):
+            creds_dict = getattr(module_obj, creds_var_name)
+            if isinstance(creds_dict, dict) and key in creds_dict:
+                return creds_dict[key]
+    return default
+def get_key_by_value(dictionary, value):
+    for key, val in dictionary.items():
+        if val == value:
+            return key
+    return None
+def markdown_spacing(number):
+    """Convert a number to that many '&nbsp;' characters."""
+    return '&nbsp;' * number
+def wrap_with_spaces(text_to_wrap, prefix_spaces=2, suffix_spaces=2):
+    """Wrap text with non-breaking spaces on either side."""
+    prefix = markdown_spacing(prefix_spaces) if prefix_spaces > 0 else ""
+    suffix = markdown_spacing(suffix_spaces) if suffix_spaces > 0 else ""
+    return f"{prefix}{text_to_wrap}{suffix}"
+def load_file_dataframe(file, file_extension, sheet_selector=None, excel_data=None, header_row=0):
+    """
+    Load a dataframe from an uploaded file with customizable header and row skipping.
+    Parameters:
+    -----------
+    file : marimo.ui.file object
+        The file upload component containing the file data
+    file_extension : str
+        The extension of the uploaded file (.xlsx, .xls, .csv, .json)
+    sheet_selector : marimo.ui.dropdown, optional
+        Dropdown component for selecting Excel sheets
+    excel_data : BytesIO, optional
+        BytesIO object containing Excel data
+    header_row : int, optional
+        Row index to use as column headers (0-based). Default is 0 (first row).
+        Use None to have pandas generate default column names.
+    Returns:
+    --------
+    tuple
+        (pandas.DataFrame, list) - The loaded dataframe and list of column names
+    """
+    dataframe = pd.DataFrame([])
+    column_names = []
+    if file.contents():
+        # Handle different file types
+        if file_extension in ['.xlsx', '.xls'] and sheet_selector is not None and sheet_selector.value:
+            # For Excel files - now we can safely access sheet_selector.value
+            excel_data.seek(0)  # Reset buffer position
+            dataframe = pd.read_excel(
+                excel_data,
+                sheet_name=sheet_selector.value,
+                header=header_row,
+                engine="openpyxl" if file_extension == '.xlsx' else "xlrd"
+            )
+            column_names = list(dataframe.columns)
+        elif file_extension == '.csv':
+            # For CSV files
+            csv_data = io.StringIO(file.contents().decode('utf-8'))
+            dataframe = pd.read_csv(csv_data, header=header_row)
+            column_names = list(dataframe.columns)
+        elif file_extension == '.json':
+            # For JSON files
+            try:
+                json_data = json.loads(file.contents().decode('utf-8'))
+                # Handle different JSON structures
+                if isinstance(json_data, list):
+                    dataframe = pd.DataFrame(json_data)
+                elif isinstance(json_data, dict):
+                    # If it's a dictionary with nested structures, try to normalize it
+                    if any(isinstance(v, (dict, list)) for v in json_data.values()):
+                        # For nested JSON with consistent structure
+                        dataframe = pd.json_normalize(json_data)
+                    else:
+                        # For flat JSON
+                        dataframe = pd.DataFrame([json_data])
+                column_names = list(dataframe.columns)
+            except Exception as e:
+                print(f"Error parsing JSON: {e}")
+    return dataframe, column_names
+def create_parameter_table(input_list, column_name="Active Options", label="Select the Parameters to set to Active",
+                            selection_type="multi-cell", text_justify="center"):
+    """
+    Creates a marimo table for parameter selection.
+    Args:
+        input_list: List of parameter names to display in the table
+        column_name: Name of the column (default: "Active Options")
+        label: Label for the table (default: "Select the Parameters to set to Active:")
+        selection_type: Selection type, either "single-cell" or "multi-cell" (default: "multi-cell")
+        text_justify: Text justification for the column (default: "center")
+    Returns:
+        A marimo table configured for parameter selection
+    """
+    import marimo as mo
+    # Validate selection type
+    if selection_type not in ["single-cell", "multi-cell"]:
+        raise ValueError("selection_type must be either 'single-cell' or 'multi-cell'")
+    # Validate text justification
+    if text_justify not in ["left", "center", "right"]:
+        raise ValueError("text_justify must be one of: 'left', 'center', 'right'")
+    # Create the table
+    parameter_table = mo.ui.table(
+        label=f"**{label}**",
+        data={column_name: input_list},
+        selection=selection_type,
+        text_justify_columns={column_name: text_justify}
+    )
+    return parameter_table
+def get_cell_values(parameter_options):
+    """
+    Extract active parameter values from a mo.ui.table.
+    Args:
+        parameter_options: A mo.ui.table with cell selection enabled
+    Returns:
+        Dictionary mapping parameter names to boolean values (True/False)
+    """
+    # Get all parameter names from the table data
+    all_params = set()
+    # Use the data property to get all options from the table
+    if hasattr(parameter_options, 'data'):
+        table_data = parameter_options.data
+        # Handle DataFrame-like structure
+        if hasattr(table_data, 'shape') and hasattr(table_data, 'iloc'):
+            for i in range(table_data.shape[0]):
+                # Get value from first column
+                if table_data.shape[1] > 0:
+                    param = table_data.iloc[i, 0]
+                    if param and isinstance(param, str):
+                        all_params.add(param)
+        # Handle dict structure (common in marimo tables)
+        elif isinstance(table_data, dict):
+            # Get the first column's values
+            if len(table_data) > 0:
+                col_name = next(iter(table_data))
+                for param in table_data[col_name]:
+                    if param and isinstance(param, str):
+                        all_params.add(param)
+    # Create result dictionary with all parameters set to False by default
+    result = {param: False for param in all_params}
+    # Get the selected cells
+    if hasattr(parameter_options, 'value') and parameter_options.value is not None:
+        selected_cells = parameter_options.value
+        # Process selected cells
+        for cell in selected_cells:
+            if hasattr(cell, 'value') and cell.value in result:
+                result[cell.value] = True
+            elif isinstance(cell, dict) and 'value' in cell and cell['value'] in result:
+                result[cell['value']] = True
+            elif isinstance(cell, str) and cell in result:
+                result[cell] = True
+    return result
+def convert_table_to_json_docs(df, selected_columns=None):
+    """
+    Convert a pandas DataFrame or dictionary to a list of JSON documents.
+    Dynamically includes columns based on user selection.
+    Column names are standardized to lowercase with underscores instead of spaces
+    and special characters removed.
+    Args:
+        df: The DataFrame or dictionary to process
+        selected_columns: List of column names to include in the output documents
+    Returns:
+        list: A list of dictionaries, each representing a row as a JSON document
+    """
+    import pandas as pd
+    import re
+    def standardize_key(key):
+        """Convert a column name to lowercase with underscores instead of spaces and no special characters"""
+        if not isinstance(key, str):
+            return str(key).lower()
+        # Replace spaces with underscores and convert to lowercase
+        key = key.lower().replace(' ', '_')
+        # Remove special characters (keeping alphanumeric and underscores)
+        return re.sub(r'[^\w]', '', key)
+    # Handle case when input is a dictionary
+    if isinstance(df, dict):
+        # Filter the dictionary to include only selected columns
+        if selected_columns:
+            return [{standardize_key(k): df.get(k, None) for k in selected_columns}]
+        else:
+            # If no columns selected, return all key-value pairs with standardized keys
+            return [{standardize_key(k): v for k, v in df.items()}]
+    # Handle case when df is None
+    if df is None:
+        return []
+    # Ensure df is a DataFrame
+    if not isinstance(df, pd.DataFrame):
+        try:
+            df = pd.DataFrame(df)
+        except:
+            return []  # Return empty list if conversion fails
+    # Now check if DataFrame is empty
+    if df.empty:
+        return []
+    # Process selected_columns if it's a dictionary of true/false values
+    if isinstance(selected_columns, dict):
+        # Extract keys where value is True
+        selected_columns = [col for col, include in selected_columns.items() if include]
+    # If no columns are specifically selected, use all available columns
+    if not selected_columns or not isinstance(selected_columns, list) or len(selected_columns) == 0:
+        selected_columns = list(df.columns)
+    # Determine which columns exist in the DataFrame
+    available_columns = []
+    columns_lower = {col.lower(): col for col in df.columns if isinstance(col, str)}
+    for col in selected_columns:
+        if col in df.columns:
+            available_columns.append(col)
+        elif isinstance(col, str) and col.lower() in columns_lower:
+            available_columns.append(columns_lower[col.lower()])
+    # If no valid columns found, return empty list
+    if not available_columns:
+        return []
+    # Process rows
+    json_docs = []
+    for _, row in df.iterrows():
+        doc = {}
+        for col in available_columns:
+            value = row[col]
+            # Standardize the column name when adding to document
+            std_col = standardize_key(col)
+            doc[std_col] = None if pd.isna(value) else value
+        json_docs.append(doc)
+    return json_docs
+def filter_models_by_function(resources, function_type="prompt_chat"):
+    """
+    Filter model IDs from resources list that have a specific function type
+    Args:
+        resources (list): List of model resource objects
+        function_type (str, optional): Function type to filter by. Defaults to "prompt_chat".
+    Returns:
+        list: List of model IDs that have the specified function
+    """
+    filtered_model_ids = []
+    if not resources or not isinstance(resources, list):
+        return filtered_model_ids
+    for model in resources:
+        # Check if the model has a functions attribute
+        if "functions" in model and isinstance(model["functions"], list):
+            # Check if any function has the matching id
+            has_function = any(
+                func.get("id") == function_type
+                for func in model["functions"]
+                if isinstance(func, dict)
+            )
+            if has_function and "model_id" in model:
+                filtered_model_ids.append(model["model_id"])
+    return filtered_model_ids
+def get_model_selection_table(client=None, model_type="all", filter_functionality=None, selection_mode="single-cell"):
+    """
+    Creates and displays a table for model selection based on specified parameters.
+    Args:
+        client: The client object for API calls. If None, returns default models.
+        model_type (str): Type of models to display. Options: "all", "chat", "embedding".
+        filter_functionality (str, optional): Filter models by functionality type.
+            Options include: "image_chat", "text_chat", "autoai_rag",
+            "text_generation", "multilingual", etc.
+        selection_mode (str): Mode for selecting table entries. Options: "single", "single-cell".
+            Defaults to "single-cell".
+    Returns:
+        The selected model ID from the displayed table.
+    """
+    # Default model list if client is None
+    default_models = ['mistralai/mistral-large']
+    if client is None:
+        # If no client, use default models
+        available_models = default_models
+        selection = mo.ui.table(
+            available_models,
+            selection="single",
+            label="Select a model to use.",
+            page_size=30,
+        )
+        return selection
+    # Get appropriate model specs based on model_type
+    if model_type == "chat":
+        model_specs = client.foundation_models.get_chat_model_specs()
+    elif model_type == "embedding":
+        model_specs = client.foundation_models.get_embeddings_model_specs()
+    else:
+        model_specs = client.foundation_models.get_model_specs()
+    # Extract resources from model specs
+    resources = model_specs.get("resources", [])
+    # Filter by functionality if specified
+    if filter_functionality and resources:
+        model_id_list = filter_models_by_function(resources, filter_functionality)
+    else:
+        # Create list of model IDs if no filtering
+        model_id_list = [resource["model_id"] for resource in resources]
+    # If no models available after filtering, use defaults
+    if not model_id_list:
+        model_id_list = default_models
+    # Create and display selection table
+    model_selector = mo.ui.table(
+        model_id_list,
+        selection=selection_mode,
+        label="Select a model to use.",
+        page_size=30,
+        initial_selection = [("0", "value")] if selection_mode == "single-cell" else [0]
+        ### For single-cell it must have [("<row_nr as a string>","column_name string")] to work as initial value
+    )
+    return model_selector, resources, model_id_list
+def _enforce_model_selection(model_selection, model_id_list):
+    # If nothing is selected (empty list) or value is None
+    if not model_selection.value:
+        # Reset to first item
+            model = 0
+            model_selection._value = model_id_list[model]
+            print(model_selection.value)
+            return model_selection.value
+def update_max_tokens_limit(model_selection, resources, model_id_list):
+    # Default value
+    default_max_tokens = 4096
+    try:
+        # Check if we have a selection and resources
+        if model_selection.value is None or not hasattr(model_selection, 'value'):
+            print("No model selection or selection has no value")
+            return default_max_tokens
+        if not resources or not isinstance(resources, list) or len(resources) == 0:
+            print("Resources is empty or not a list")
+            return default_max_tokens
+        # Get the model ID - handle both index selection and direct string selection
+        selected_value = model_selection.value
+        print(f"Raw selection value: {selected_value}")
+        # If it's an array with indices
+        if isinstance(selected_value, list) and len(selected_value) > 0:
+            if isinstance(selected_value[0], int) and 0 <= selected_value[0] < len(model_id_list):
+                selected_model_id = model_id_list[selected_value[0]]
+            else:
+                selected_model_id = str(selected_value[0])  # Convert to string if needed
+        else:
+            selected_model_id = str(selected_value)  # Direct value
+        print(f"Selected model ID: {selected_model_id}")
+        # Find the model
+        for model in resources:
+            model_id = model.get("model_id")
+            if model_id == selected_model_id:
+                if "model_limits" in model and "max_output_tokens" in model["model_limits"]:
+                    return model["model_limits"]["max_output_tokens"]
+                break
+    except Exception as e:
+        print(f"Error: {e}")
+    return default_max_tokens
+def load_templates(
+    folder_path: str,
+    file_extensions: Optional[List[str]] = None,
+    strip_whitespace: bool = True
+) -> Dict[str, str]:
+    """
+    Load template files from a specified folder into a dictionary.
+    Args:
+        folder_path: Path to the folder containing template files
+        file_extensions: List of file extensions to include (default: ['.txt', '.md'])
+        strip_whitespace: Whether to strip leading/trailing whitespace from templates (default: True)
+    Returns:
+        Dictionary with filename (without extension) as key and file content as value
+    """
+    # Default extensions if none provided
+    if file_extensions is None:
+        file_extensions = ['.txt', '.md']
+    # Ensure extensions start with a dot
+    file_extensions = [ext if ext.startswith('.') else f'.{ext}' for ext in file_extensions]
+    templates = {"empty": "                        "}  # Default empty template
+    # Create glob patterns for each extension
+    patterns = [os.path.join(folder_path, f'*{ext}') for ext in file_extensions]
+    # Find all matching files
+    for pattern in patterns:
+        for file_path in glob.glob(pattern):
+            try:
+                # Extract filename without extension to use as key
+                filename = os.path.basename(file_path)
+                template_name = os.path.splitext(filename)[0]
+                # Read file content
+                with open(file_path, 'r', encoding='utf-8') as file:
+                    content = file.read()
+                    # Strip whitespace if specified
+                    if strip_whitespace:
+                        content = content.strip()
+                    templates[template_name] = content
+            except Exception as e:
+                print(f"Error loading template from {file_path}: {str(e)}")
+    return templates

helper_functions/table_helper_functions.py ADDED Viewed

	@@ -0,0 +1,393 @@

+def process_with_llm(fields_to_process, prompt_template, inf_model, params, batch_size=10):
+    """
+    Process documents with LLM using a prompt template with dynamic field mapping.
+    Uses template fields to extract values from pre-standardized document fields.
+    Args:
+        fields_to_process (list): List of document dictionaries to process
+        prompt_template (str): Template with {field_name} placeholders matching keys in documents
+        inf_model: The inference model instance to use for generation
+        params: Parameters to pass to the inference model
+        batch_size (int): Number of documents to process per batch
+    Returns:
+        list: Processed results from the LLM
+    """
+    import marimo as mo
+    import time
+    import re
+    # Safety check for inputs
+    if not fields_to_process or not inf_model:
+        print("Missing required inputs")
+        return []
+    # Handle case where prompt_template is a dictionary (from UI components)
+    if isinstance(prompt_template, dict) and 'value' in prompt_template:
+        prompt_template = prompt_template['value']
+    elif not isinstance(prompt_template, str):
+        print(f"Invalid prompt template type: {type(prompt_template)}, expected string")
+        return []
+    # Extract field names from the prompt template using regex
+    # This finds all strings between curly braces
+    field_pattern = r'\{([^{}]+)\}'
+    template_fields = re.findall(field_pattern, prompt_template)
+    if not template_fields:
+        print("No field placeholders found in template")
+        return []
+    # Create formatted prompts from the documents
+    formatted_prompts = []
+    for doc in fields_to_process:
+        try:
+            # Create a dictionary of field values to substitute
+            field_values = {}
+            for field in template_fields:
+                # Try direct match first
+                if field in doc:
+                    field_values[field] = doc[field] if doc[field] is not None else ""
+                # If field contains periods (e.g., "data.title"), evaluate it
+                elif '.' in field:
+                    try:
+                        # Build a safe evaluation string
+                        parts = field.split('.')
+                        value = doc
+                        for part in parts:
+                            if isinstance(value, dict) and part in value:
+                                value = value[part]
+                            else:
+                                value = None
+                                break
+                        field_values[field] = value if value is not None else ""
+                    except:
+                        field_values[field] = ""
+                else:
+                    # Default to empty string if field not found
+                    field_values[field] = ""
+            # Handle None values at the top level to ensure formatting works
+            for key in field_values:
+                if field_values[key] is None:
+                    field_values[key] = ""
+            # Format the prompt with all available fields
+            prompt = prompt_template.format(**field_values)
+            formatted_prompts.append(prompt)
+        except Exception as e:
+            print(f"Error formatting prompt: {str(e)}")
+            print(f"Field values: {field_values}")
+            continue
+    # Return empty list if no valid prompts
+    if not formatted_prompts:
+        print("No valid prompts generated")
+        return []
+    # Print a sample of the formatted prompts for debugging
+    if formatted_prompts:
+        print(f"Sample formatted prompt: {formatted_prompts[0][:200]}...")
+    # Split into batches
+    batches = [formatted_prompts[i:i + batch_size] for i in range(0, len(formatted_prompts), batch_size)]
+    results = []
+    # Process each batch
+    with mo.status.progress_bar(
+        total=len(batches),
+        title="Processing Batches",
+        subtitle=f"Processing {len(formatted_prompts)} prompts in {len(batches)} batches",
+        completion_title="Processing Complete",
+        completion_subtitle=f"Processed {len(formatted_prompts)} prompts successfully",
+        show_rate=True,
+        show_eta=True,
+        remove_on_exit=True
+    ) as progress:
+        for i, batch in enumerate(batches):
+            start_time = time.time()
+            try:
+                # Use the provided inference model to generate responses
+                print(f"Sending batch {i+1} of {len(batches)} to model")
+                # Call the inference model with the batch of prompts and params
+                batch_results = inf_model.generate_text(prompt=batch, params=params)
+                results.extend(batch_results)
+            except Exception as e:
+                print(f"Error in batch {i+1}: {str(e)}")
+                continue
+            end_time = time.time()
+            inference_time = end_time - start_time
+            print(f"Inference time for Batch {i+1}: {inference_time:.2f} seconds")
+            # Update progress bar
+            progress.update(increment=1)
+        # Add 1 second delay on completion before removing
+        time.sleep(1)
+    return results
+# def process_with_llm_no_progress_bar(fields_to_process, prompt_template, inf_model, params, batch_size=10):
+#     """
+#     Process documents with LLM using a prompt template with dynamic field mapping.
+#     Uses template fields to extract values from pre-standardized document fields.
+#     Args:
+#         fields_to_process (list): List of document dictionaries to process
+#         prompt_template (str): Template with {field_name} placeholders matching keys in documents
+#         inf_model: The inference model instance to use for generation
+#         params: Parameters to pass to the inference model
+#         batch_size (int): Number of documents to process per batch
+#     Returns:
+#         list: Processed results from the LLM
+#     """
+#     import time
+#     import re
+#     # Safety check for inputs
+#     if not fields_to_process or not inf_model:
+#         print("Missing required inputs")
+#         return []
+#     # Handle case where prompt_template is a dictionary (from UI components)
+#     if isinstance(prompt_template, dict) and 'value' in prompt_template:
+#         prompt_template = prompt_template['value']
+#     elif not isinstance(prompt_template, str):
+#         print(f"Invalid prompt template type: {type(prompt_template)}, expected string")
+#         return []
+#     # Extract field names from the prompt template using regex
+#     # This finds all strings between curly braces
+#     field_pattern = r'\{([^{}]+)\}'
+#     template_fields = re.findall(field_pattern, prompt_template)
+#     if not template_fields:
+#         print("No field placeholders found in template")
+#         return []
+#     # Create formatted prompts from the documents
+#     formatted_prompts = []
+#     for doc in fields_to_process:
+#         try:
+#             # Create a dictionary of field values to substitute
+#             field_values = {}
+#             for field in template_fields:
+#                 # Try direct match first
+#                 if field in doc:
+#                     field_values[field] = doc[field] if doc[field] is not None else ""
+#                 # If field contains periods (e.g., "data.title"), evaluate it
+#                 elif '.' in field:
+#                     try:
+#                         # Build a safe evaluation string
+#                         parts = field.split('.')
+#                         value = doc
+#                         for part in parts:
+#                             if isinstance(value, dict) and part in value:
+#                                 value = value[part]
+#                             else:
+#                                 value = None
+#                                 break
+#                         field_values[field] = value if value is not None else ""
+#                     except:
+#                         field_values[field] = ""
+#                 else:
+#                     # Default to empty string if field not found
+#                     field_values[field] = ""
+#             # Handle None values at the top level to ensure formatting works
+#             for key in field_values:
+#                 if field_values[key] is None:
+#                     field_values[key] = ""
+#             # Format the prompt with all available fields
+#             prompt = prompt_template.format(**field_values)
+#             formatted_prompts.append(prompt)
+#         except Exception as e:
+#             print(f"Error formatting prompt: {str(e)}")
+#             print(f"Field values: {field_values}")
+#             continue
+#     # Return empty list if no valid prompts
+#     if not formatted_prompts:
+#         print("No valid prompts generated")
+#         return []
+#     # Print a sample of the formatted prompts for debugging
+#     if formatted_prompts:
+#         print(f"Sample formatted prompt: {formatted_prompts[0][:200]}...")
+#     # Split into batches
+#     batches = [formatted_prompts[i:i + batch_size] for i in range(0, len(formatted_prompts), batch_size)]
+#     results = []
+#     # Process each batch
+#     for i, batch in enumerate(batches):
+#         start_time = time.time()
+#         try:
+#             # Use the provided inference model to generate responses
+#             print(f"Sending batch {i+1} of {len(batches)} to model")
+#             # Call the inference model with the batch of prompts and params
+#             batch_results = inf_model.generate_text(prompt=batch, params=params)
+#             results.extend(batch_results)
+#         except Exception as e:
+#             print(f"Error in batch {i+1}: {str(e)}")
+#             continue
+#         end_time = time.time()
+#         inference_time = end_time - start_time
+#         print(f"Inference time for Batch {i+1}: {inference_time:.2f} seconds")
+#     return results
+def append_llm_results_to_dataframe(target_dataframe, fields_to_process, llm_results, selection_table, column_name=None):
+    """
+    Add LLM processing results directly to the target DataFrame using selection indices
+    Args:
+        target_dataframe (pandas.DataFrame): DataFrame to modify in-place
+        fields_to_process (list): List of document dictionaries that were processed
+        llm_results (list): Results from the process_with_llm function
+        selection_table: Table selection containing indices of rows to update
+        column_name (str, optional): Custom name for the new column
+    """
+    column_name = column_name or f"Added Column {len(list(target_dataframe))}"
+    # Initialize the new column with empty strings if it doesn't exist
+    if column_name not in target_dataframe.columns:
+        target_dataframe[column_name] = ""
+    # Safety checks
+    if not isinstance(llm_results, list) or not llm_results:
+        print("No LLM results to add")
+        return
+    # Get indices from selection table
+    if selection_table is not None and not selection_table.empty:
+        selected_indices = selection_table.index.tolist()
+        # Make sure we have the right number of results for the selected rows
+        if len(selected_indices) != len(llm_results):
+            print(f"Warning: Number of results ({len(llm_results)}) doesn't match selected rows ({len(selected_indices)})")
+        # Add results to the DataFrame at the selected indices
+        for idx, result in zip(selected_indices, llm_results):
+            try:
+                if idx < len(target_dataframe):
+                    target_dataframe.at[idx, column_name] = result
+                else:
+                    print(f"Warning: Selected index {idx} exceeds DataFrame length")
+            except Exception as e:
+                print(f"Error adding result to DataFrame: {str(e)}")
+    else:
+        print("No selection table provided or empty selection")
+def add_llm_results_to_dataframe(original_df, fields_to_process, llm_results, column_name=None):
+    """
+    Add LLM processing results to a copy of the original DataFrame
+    Args:
+        original_df (pandas.DataFrame): Original DataFrame
+        fields_to_process (list): List of document dictionaries that were processed
+        llm_results (list): Results from the process_with_llm function
+    Returns:
+        pandas.DataFrame: Copy of original DataFrame with added "Added Column {len(list(original_df))}" column or a custom name
+    """
+    import pandas as pd
+    column_name = column_name or f"Added Column {len(list(original_df))}"
+    # Create a copy of the original DataFrame
+    result_df = original_df.copy()
+    # Initialize the new column with empty strings
+    result_df[column_name] = ""
+    # Safety checks
+    if not isinstance(llm_results, list) or not llm_results:
+        print("No LLM results to add")
+        return result_df
+    # Add results to the DataFrame
+    for i, (doc, result) in enumerate(zip(fields_to_process, llm_results)):
+        try:
+            # Find the matching row in the DataFrame
+            # This assumes the order of fields_to_process matches the original DataFrame
+            if i < len(result_df):
+                result_df.at[i, column_name] = result
+            else:
+                print(f"Warning: Result index {i} exceeds DataFrame length")
+        except Exception as e:
+            print(f"Error adding result to DataFrame: {str(e)}")
+            continue
+    return result_df
+def display_answers_as_markdown(answers, mo):
+    """
+    Takes a list of answers and displays each one as markdown using mo.md()
+    Args:
+        answers (list): List of text answers from the LLM
+        mo: The existing marimo module from the environment
+    Returns:
+        list: List of markdown elements
+    """
+    # Handle case where answers is None or empty
+    if not answers:
+        return [mo.md("No answers available")]
+    # Create markdown for each answer
+    markdown_elements = []
+    for i, answer in enumerate(answers):
+        # Create a formatted markdown element with answer number and content
+        md_element = mo.md(f"""\n\n---\n\n# Answer {i+1}\n\n{answer}""")
+        markdown_elements.append(md_element)
+    return markdown_elements
+def display_answers_stacked(answers, mo):
+    """
+    Takes a list of answers and displays them stacked vertically using mo.vstack()
+    Args:
+        answers (list): List of text answers from the LLM
+        mo: The existing marimo module from the environment
+    Returns:
+        element: A vertically stacked collection of markdown elements
+    """
+    # Get individual markdown elements
+    md_elements = display_answers_as_markdown(answers, mo)
+    # Add separator between each answer
+    separator = mo.md("---")
+    elements_with_separators = []
+    for i, elem in enumerate(md_elements):
+        elements_with_separators.append(elem)
+        if i < len(md_elements) - 1:  # Don't add separator after the last element
+            elements_with_separators.append(separator)
+    # Return a vertically stacked collection
+    return mo.vstack(elements_with_separators, align="start", gap="2")