# /// script # [tool.marimo.display] # custom_css = ["./custom_header_font.css"] # /// import marimo __generated_with = "0.13.14" app = marimo.App(width="full", app_title="watsonx-SheetProcessor-9000") with app.setup: # Initialization code that runs before all other cells import marimo as mo from typing import Dict, Optional, List, Union, Any from ibm_watsonx_ai import APIClient, Credentials from pathlib import Path import pandas as pd import mimetypes import requests import zipfile import tempfile import certifi import base64 import polars import nltk import time import json import glob import ast import os import io import re from dotenv import load_dotenv load_dotenv() def get_iam_token(api_key): return requests.post( "https://iam.cloud.ibm.com/identity/token", headers={"Content-Type": "application/x-www-form-urlencoded"}, data={ "grant_type": "urn:ibm:params:oauth:grant-type:apikey", "apikey": api_key, }, verify=certifi.where(), ).json()["access_token"] def setup_task_credentials(client): # Get existing task credentials existing_credentials = client.task_credentials.get_details() # Delete existing credentials if any if "resources" in existing_credentials and existing_credentials["resources"]: for cred in existing_credentials["resources"]: cred_id = client.task_credentials.get_id(cred) client.task_credentials.delete(cred_id) # Store new credentials return client.task_credentials.store() @app.cell def _(): ### Loads baked in credentials if present from baked_in_credentials.creds import credentials from base_variables import wx_regions, wx_platform_url ### Loads helper functions from helper_functions.helper_functions import ( get_cred_value, get_model_selection_table, filter_models_by_function, _enforce_model_selection, update_max_tokens_limit, get_key_by_value, markdown_spacing, load_file_dataframe, create_parameter_table, convert_table_to_json_docs, get_cell_values, wrap_with_spaces, load_templates, ) ### Table Related Helper Functions from helper_functions.table_helper_functions import ( append_llm_results_to_dataframe, display_answers_as_markdown, display_answers_stacked, process_with_llm, process_prompt_lineage ) return ( append_llm_results_to_dataframe, convert_table_to_json_docs, create_parameter_table, credentials, get_cell_values, get_cred_value, get_key_by_value, get_model_selection_table, load_file_dataframe, load_templates, process_prompt_lineage, process_with_llm, wrap_with_spaces, wx_regions, ) @app.cell def _(credentials, get_cred_value, wx_regions): # Create a form with multiple elements baked_in_creds = credentials client_instantiation_form = ( mo.md( """ ###**watsonx.ai credentials:** {wx_region} {wx_api_key} {project_id} {space_id} """ ) .batch( wx_region=mo.ui.dropdown( wx_regions, label="Select your watsonx.ai region:", value=get_cred_value("region", creds_var_name="baked_in_creds"), searchable=True, ), wx_api_key=mo.ui.text( placeholder="Add your IBM Cloud api-key...", label="IBM Cloud Api-key:", kind="password", value=get_cred_value("api_key", creds_var_name="baked_in_creds"), ), project_id=mo.ui.text( placeholder="Add your watsonx.ai project_id...", label="Project_ID:", kind="text", value=get_cred_value("project_id", creds_var_name="baked_in_creds"), ), space_id=mo.ui.text( placeholder="Add your watsonx.ai space_id...", label="Space_ID:", kind="text", value=get_cred_value("space_id", creds_var_name="baked_in_creds"), ), ) .form(show_clear_button=True, bordered=False) ) return (client_instantiation_form,) @app.cell def _(activate_prompt_lineage): if activate_prompt_lineage.value: switch_between_lineage_modes = mo.ui.switch( label="**Switch** between placeholder input variables and row data" ) include_input_column_names = mo.ui.checkbox() include_llm_parameters = mo.ui.checkbox() else: switch_between_lineage_modes = mo.ui.switch( label="**Switch** between placeholder input variables and row data", disabled=True, ) include_input_column_names = mo.ui.checkbox(disabled=True, label="*Inactive*") include_llm_parameters = mo.ui.checkbox(disabled=True, label="*Inactive*") return ( include_input_column_names, include_llm_parameters, switch_between_lineage_modes, ) @app.cell def _(activate_prompt_lineage, switch_between_lineage_modes): if switch_between_lineage_modes.value: current_mode = mo.md(">**Current Mode:** Saves Prompts with data used") elif activate_prompt_lineage.value == False: current_mode = mo.md(">**Current Mode:** Inactive") else: current_mode = mo.md( ">**Current Mode:** Saves Prompts with input variable placeholders" ) return (current_mode,) @app.cell def _(): activate_prompt_lineage = mo.ui.checkbox() return (activate_prompt_lineage,) @app.cell def _( activate_prompt_lineage, current_mode, include_input_column_names, include_llm_parameters, switch_between_lineage_modes, ): prompt_lineage_form = mo.md( f""" ###**Prompt Lineage** **Activate Prompt Lineage:** {activate_prompt_lineage} > If you activate prompt lineage, based on the selected options alongisde your results column a second column with the suffix *"___prompt_lineage"* with the prompt data will be included. {switch_between_lineage_modes} {current_mode} **Include LLM parameters:** {include_llm_parameters} **Include Input Variable Names:** {include_input_column_names} """ ) return (prompt_lineage_form,) @app.cell def _( activate_prompt_lineage, include_input_column_names, include_llm_parameters, switch_between_lineage_modes, ): lineage_options = { "activate_prompt_lineage": activate_prompt_lineage.value, "switch_between_lineage_modes": ( switch_between_lineage_modes.value if activate_prompt_lineage.value else None ), "include_llm_parameters": include_llm_parameters.value, "include_input_column_names": include_input_column_names.value, } return (lineage_options,) @app.cell def _(): mo.md( r""" #watsonx.ai LLM Table Processor - Marimo Notebook #### This marimo notebook can be used to process tabular data by running a prompt on each selected row with specified columns and input variables. Each row will be prompted with the data within its columns and the outputs will be added as a new column to the end of the table. Optionally one can also toggle on prompt lineage which allows you to save data about the LLM used, input prompts with or without data baked in, etc. #### This tool can be used for an unlimited number of use-cases, in practice it has found popularity as a way to perform grammar/tone/requirements validation for RFP requirements tables, but also to generate synthetic data based on scenario/requirements specifications and many other uses. >>> Keep in mind that all data and states are stored in memory, refreashing the page or restarting the application will loose all progress unless you have downloaded/cloned this repository and made the adjustments to create a backend to store it.
/// attention | Warning! While I have been asked whether it can be used to generate responses like in the case of RFP requirements tables, i do not recommend **EVER** using an LLM for generating text in such a way for any task that requires factual validity or cannot accomodate the risk of faulty text sneaking past. Most individuals state that they will evaluate all of the outputs, but the very motivation behind this kind of LLM use is to avoid doing this type of work.

The likelihood of an individual manually evaluating and correcting hundreds of rows of data without an error sneaking past is very low, not to mention that it would most likely take more time than to write the text yourself.
**Large Language Models do not understand the context, interpretation, background information or intent in the way that humans expect that they do.** ///
/// admonition Created by ***Milan Mrdenovic*** [milan.mrdenovic@ibm.com] for IBM Ecosystem Client Engineering, NCEE - ***version 3.1*** - *01.06.2025*
Initial concept co-created with ***Inga Tomasdottir*** [inga.tomasdottir@ibm.com]. /// >Licensed under apache 2.0, users hold full accountability for any use or modification of the code or application. >
This asset is part of a set meant to support IBMers, IBM Partners, Clients in developing understanding of how to better utilize various watsonx features and generative AI as a subject matter.
""" ) return @app.cell def _(): mo.md( r"""
###Part 1 - Client Setup, File Upload, Column and Row Selection """ ) return @app.cell def _(client_section): ui_accordion_section_1 = mo.accordion( {"Section 1: **watsonx.ai Credentials**": client_section} ) ui_accordion_section_1 return @app.cell def _(why_important): # Markdown Documentation file_loader_md=""" /// admonition **Drag and Drop or Click to Load in a file *(supported formats .csv, .xlsx or .json)*.**
If you load an excel file with multiple sheets you will see a dropdown menu to select your active sheet.

The checkbox **activate header adjustment** lets you move *(downwards)* which row will be used for the headers, be very careful with this and only use it in cases where the column names row is not the first one in the file.

When this is active the number selector - **Header Row index** controls which one it is with 0 being the first one. /// """ column_and_row_selector_md=""" /// admonition **In this section you need to select the rows which you want to process with an LLM, alongside the columns that will become input variables.** The columns that are detected based on the header of the table *(or in the case of JSON the key names)* are turned into a multi-select list, **if you don't select any, all are made available to you**.

If you want **to select every row** in the document, **click the selection button in the header of the table**, that selects all rows in the visible table then scroll down to the end of the table page, in the bottom left you will see a **blue Select all button**, clicking it will select all rows in the loaded document.

The buttons to the right of this text control the look of the table - namely column text wrapping and the justification of the text. **Bare in mind - Changing it WILL reset your selections**, so do it in the beginning if you plan on changing it. The table has additional features, so feel free to check the other buttons built into the widget. /// """ llm_setup_md=""" /// admonition **Select an LLM from the list of available models in your watsonx.ai instance/region and set up the parameters.**
**Unless you are very used to working with LLMs and prompt engineering, it's best to only adjust the maximum and minimum output tokens.**
If you switch the decoding to sampling you will get additional input widgets for temperature, top_p, top_k and seed definition.
**Stopping sequences** should either be:
**1)** The default EOS token for your model *(most models have their tokenizers listed on huggingface [https://huggingface.co])*
**2)** A custom word/trigger you want to use *(e.g. stopping when a specific word is generated)*
**3)** Or just leave the default ones provided here *(the default ones are the EoS tokens for llama3 and mistral models)*

**Section 5 - Prompt Lineage** Allows you to save an additional column with the prompt setup, llm parameters, etc. and has its own simple instruction set so it does not have a detailed description area like this one.

*p.s. ***EOS token*** stands for "End of String" and is used during training to nudge the model into knowing when to stop generating outputs.* /// """ prompt_and_variable_setup_md=f""" /// admonition **Use the scroll bar to choose how many prompts you want to have *(max. 5)* each will be available to you to prompt with the same model and parameters but different prompt setups.**
If you duplicate the repository you will be able to set up different baked in starter templates, the templates are to be stored as .txt files in a folder called *```prompt_templates```* in the directory. Each folder and the files inside it will be loaded into the dropdown selectors. This way you can customize it with pre-made tasks or added llm syntax. {why_important} On a practical note, most providers publish guides on the tags that comprise their templates, like these:
[https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_3/]
[https://www.ibm.com/granite/docs/models/granite/]
[https://docs.mistral.ai/guides/tokenization/] /// """ additional_variables_setup_md=""" /// admonition **In this section you can create up to 5 custom variables that will be appended to all rows in the table when prompting.**
Use it in the case of needing to add large sections of text that doesn't change that you'd like to reuse in different prompts, for example:
**1)** system prompts
**2)** guardrail definitions
**3)** rules or directives
or other data elements that do not change between prompts.

Don't forget to **turn on the checkbox** in the tab if you want to use it, otherwise they won't be applied. /// """ prompt_stack_md=""" /// admonition **The app will generate a column_name input and prompt editor box for the number of prompts you selected int he previous steps.**

Add an appropriate column name into the widget, as this will be the name of the column with the generated responses and edit your prompt with the appropriate syntax and instructions. To define where your column data goes, add the column name with {} surrounding them, for example *```{description}```* will paste in the value of the cell in the column description for each row that is being prompted. You don't need to include all of the rows, nor all of the rows you provided before, but unless you specify them like this they will not be used. The same goes for any additional variables you defined, just specify their name. > If any input variables had malformed names the app will automatically rename them to all lowercase letters with any spaces replaced by _. If you are unsure of any of the names, you can click the box at the bottom of the app to show the input variable sidebar. This will close all of the app sections, but it does not loose you progress. It's just a minor glitch from how marimo handles states but does not affect your progress. /// """ results_stack_md=""" /// admonition **Pressing each button will batch prompt *(10 rows per call)* the LLM and prompt in that slot. Then it will append it with the column name defined in the prompt setup tab to the end of the table.**

If you have turned on the prompt lineage feature another column will be added to the right with the prompt lineage outputs that you selected.
If you make changes to the prompt template, but do not change the column name in that slot it will overwrite the column. Changing the column name will generate a new prompt.
> Once you have finished with your work and want to save your results, do the same as in the beginning to **select all the rows by clicking the header checkbox and then selecting all rows in the bottom left.
After that click the download button on the right side to export as a json, csv or parquet file**. /// """ return ( additional_variables_setup_md, column_and_row_selector_md, file_loader_md, llm_setup_md, prompt_and_variable_setup_md, prompt_stack_md, results_stack_md, ) @app.cell def _(): why_important_md = mo.md("""/// attention | Unless one uses a chatapi *(which this app does not to provide the most flexibility)* you have to provide correct syntax for different LLMs.
These tags are used in training *(alongside the previously mentioned EOS token)* to nudge the model into understanding what part of the input is a user instruction, system prompt, function call, etc. LLMs are by their very nature volatile, there is no way actual to build in features like *"tool calling"* or *"safety guardrails"*, all that we can do is introduce these artificial tags into the fine-tuning data, provide tens/hundreds of thousands of tailored examples of the desired result and then hope that the model *"picks up the hint"* if you will. If you've seen tags like *<|image_start|>* or *[INST]* in documentation or prompt templates, that's what they are. This is not properly communicated in the public discourse, because a lot of people would not feel comfortable with using these technologies if they did not feel like the rigid and safe mechanistic rules of software logic that they are used to since the dawn of computer science. It also doesn't bode well for people who benefit from both the hype and fear surrounding ideas of AGI and living machines, which by very definition of how the technology works... LLMs cannot be. So they don't mention it. But that's a topic for another time, maybe over some coffee. Without these tags, models will at best peform subpar and at worst completely break, generating loops of sentences, garbage text or in some cases hallucinating so bad that they might output some rather creepy nonsense. All that LLM chat api's do is programatically enclose your text with the tags before delivering it to the LLM as a system of messages. ///""") why_important_accordion = mo.accordion( {"### Why is this important?": why_important_md} ) why_important = mo.callout(why_important_accordion, kind="warn") return (why_important,) @app.cell def _(file_loader_md, file_uploader): ui_accordion_section_2 = mo.accordion( {"Section 2: **File Loading**": mo.hstack([mo.md(file_loader_md),file_uploader], justify="space-around", align="center", widths=[0.55,0.4])} ) ui_accordion_section_2 return @app.cell def _(column_and_rows_sector): ui_accordion_section_3 = mo.accordion( {"Section 3: **Column and Row Selection**": column_and_rows_sector} ) ui_accordion_section_3 return @app.cell def _(): mo.md( r"""
###Part 2 - Model and Prompt Lineage Setup """ ) return @app.cell def _(llm_setup_with_docs): ui_accordion_section_4 = mo.accordion({"Section 4: **Model Setup**": llm_setup_with_docs}) ui_accordion_section_4 return @app.cell def _(prompt_lineage_form): ui_accordion_section_5 = mo.accordion({"Section 5: **Prompt Lineage** - *[Optional]*": prompt_lineage_form}) ui_accordion_section_5 return @app.cell def _(): mo.md( r"""
###Part 3 - Prompt Templates and Results Table """ ) return @app.cell def _(prompt_setup_stack): ui_accordion_section_6 = mo.accordion( { "Section 6: **Choose the Number of Prompts and Templates**": prompt_setup_stack } ) ui_accordion_section_6 return @app.cell def _(additional_variables_stack): ui_accordion_section_6_1 = mo.accordion( {"Section 6.1: **Additional Variables** *(Optional)*": additional_variables_stack} ) ui_accordion_section_6_1 return @app.cell def _(prompt_stack): ui_accordion_section_7 = mo.accordion({"Section 7: **Prompt Setup**": prompt_stack}) ui_accordion_section_7 return @app.cell def _(client_instantiation_form): client_setup = client_instantiation_form.value or None ### Extract Credential Variables: if client_setup: wx_url = client_setup["wx_region"] if client_setup["wx_region"] else "EU" wx_api_key = ( client_setup["wx_api_key"].strip() if client_setup["wx_api_key"] else None ) os.environ["WATSONX_APIKEY"] = wx_api_key or "" project_id = ( client_setup["project_id"].strip() if client_setup["project_id"] else None ) space_id = ( client_setup["space_id"].strip() if client_setup["space_id"] else None ) else: os.environ["WATSONX_APIKEY"] = "" project_id = space_id = wx_api_key = wx_url = None return client_setup, project_id, space_id, wx_api_key, wx_url @app.cell def client_instantiation( client_setup, project_id, space_id, wx_api_key, wx_url, ): ### Instantiate the watsonx.ai client if client_setup: try: wx_credentials = Credentials(url=wx_url, api_key=wx_api_key) project_client = ( APIClient(credentials=wx_credentials, project_id=project_id) if project_id else None ) deployment_client = ( APIClient(credentials=wx_credentials, space_id=space_id) if space_id else None ) instantiation_success = True instantiation_error = None except Exception as e: instantiation_success = False instantiation_error = str(e) wx_credentials = project_client = deployment_client = None else: wx_credentials = project_client = deployment_client = None instantiation_success = None instantiation_error = None return ( deployment_client, instantiation_error, instantiation_success, project_client, ) @app.cell def _(client_callout_kind, client_instantiation_form, client_status): client_callout = mo.callout(client_status, kind=client_callout_kind) client_section = mo.hstack( [client_instantiation_form, client_callout], align="center", justify="space-around", ) return (client_section,) @app.cell def _( client_key, client_options, client_selector, client_setup, get_key_by_value, instantiation_error, instantiation_success, wrap_with_spaces, ): active_client_name = ( get_key_by_value(client_options, client_key) if client_key else "No Client" or "Project Client" ) if client_setup: if instantiation_success: client_status = mo.md( f"### ✅ Client Instantiation Successful ✅\n\n" f"{client_selector}\n\n" f"**Active Client:**{wrap_with_spaces(active_client_name, prefix_spaces=5)}" ) client_callout_kind = "success" else: client_status = mo.md( f"### ❌ Client Instantiation Failed\n**Error:** {instantiation_error}\n\nCheck your region selection and credentials" ) client_callout_kind = "danger" else: client_status = mo.md( f"### Client Instantiation Status will turn Green When Ready\n\n" f"{client_selector}\n\n" f"**Active Client:**{wrap_with_spaces(active_client_name, prefix_spaces=5)}" ) client_callout_kind = "neutral" return client_callout_kind, client_status @app.cell def _(deployment_client, project_client): if project_client is not None and deployment_client is not None: client_options = { "Project Client": project_client, "Deployment Client": deployment_client, } elif project_client is not None: client_options = {"Project Client": project_client} elif deployment_client is not None: client_options = {"Deployment Client": deployment_client} else: client_options = {"No Client": "Instantiate a Client"} default_client = next(iter(client_options)) client_selector = mo.ui.dropdown( client_options, value=default_client, label="**Switch your active client:**" ) return client_options, client_selector @app.cell def _(client_selector): client_key = client_selector.value if client_key == "Instantiate a Client": client = None else: client = client_key return client, client_key @app.cell def _(): file = mo.ui.file( kind="area", filetypes=[".xlsx", ".xls", ".csv", ".json"], label="Upload a file (CSV, Excel, or JSON)", ) return (file,) @app.cell def _(file): def get_file_extension(filename): """Get the file extension from a filename.""" if not filename: return None return os.path.splitext(filename)[1].lower() # Initialize variables sheet_names = [] file_extension = None excel_data = None if file.contents(): file_extension = get_file_extension(file.name()) # Handle Excel files to get sheet names if file_extension in [".xlsx", ".xls"]: # For Excel files excel_data = io.BytesIO(file.contents()) # Get sheet names without loading the data yet sheet_names = pd.ExcelFile(excel_data).sheet_names # Create sheet selector for Excel files if file_extension in [".xlsx", ".xls"] and sheet_names: sheet_selector = mo.ui.dropdown( options=sheet_names, value=sheet_names[0], label="Select Sheet:", full_width=False, searchable=True, ) else: sheet_selector = None return excel_data, file_extension, sheet_selector @app.cell def _(table_dataframe_raw): if not table_dataframe_raw.empty: apply_header_readjustment = mo.ui.checkbox(label="Activate Header Adjustment") else: apply_header_readjustment = None return (apply_header_readjustment,) @app.cell def _(): show_variable_sidebar = mo.ui.checkbox( label="Show Sidebar with Input Variables", value=False ) return (show_variable_sidebar,) @app.cell def _(apply_header_readjustment, sheet_selector, table_dataframe_raw): if not table_dataframe_raw.empty: if apply_header_readjustment.value: header_row = mo.ui.number( label="Header Row index:", value=0, start=0, stop=len(table_dataframe_raw) + 1, ) else: header_row = mo.ui.number( label="Header Row index:", value=0, start=0, stop=0 ) else: header_row = None sheet_and_column_controls = mo.hstack( [sheet_selector, apply_header_readjustment, header_row], gap=2, justify="space-around", ) return header_row, sheet_and_column_controls @app.cell def _(file, sheet_and_column_controls): if file.name(): name_printout = mo.md(f"**{file.name()}**") else: name_printout = mo.md(f"No File Uploaded") file_uploader = mo.vstack( [file, name_printout, sheet_and_column_controls], justify="space-around", align="center", ) return (file_uploader,) @app.cell def _(): wrap_columns = mo.ui.switch(label="**Wrap column text in table**") return (wrap_columns,) @app.cell def _(): justify_options = ["left","center","right"] justify_columns = mo.ui.dropdown(label="**Justify column text to:**", options=justify_options, value="left") return (justify_columns,) @app.cell def _(column_and_row_selector_md, justify_columns, wrap_columns): table_controls_stack = mo.vstack([wrap_columns, justify_columns], justify="start", gap=1) table_controls_stack_and_docs = mo.hstack([mo.md(column_and_row_selector_md),table_controls_stack], justify="space-around", align="center", widths=[0.55,0.25]) return (table_controls_stack_and_docs,) @app.cell def _( justify_columns, set_column_justify_state, table_column_names, table_dataframe, ): if not table_dataframe.empty: justify_column_value = justify_columns.value center_column_text = {col: justify_column_value for col in table_column_names} set_column_justify_state(center_column_text) else: center_column_text = None set_column_justify_state(center_column_text) return @app.cell def _( set_column_wrapping_state, table_column_names, table_dataframe, wrap_columns, ): if not table_dataframe.empty and wrap_columns.value: set_column_wrapping_state(table_column_names) else: set_column_wrapping_state(None) return @app.cell def _(): get_column_justify_state, set_column_justify_state = mo.state(None) return get_column_justify_state, set_column_justify_state @app.cell def _(): get_column_wrapping_state, set_column_wrapping_state = mo.state(None) return get_column_wrapping_state, set_column_wrapping_state @app.function def apply_header_row(table_dataframe, header_row_value): """ Set a specific row as the header for a dataframe. Parameters: ----------- table_dataframe : pd.DataFrame The dataframe to modify header_row_value : int Row index to use as header (0-based) Returns: -------- tuple : (pd.DataFrame, list) Modified dataframe with new headers and list of column names """ if not table_dataframe.empty: # Convert header row to column names new_header = table_dataframe.iloc[header_row_value] # Create new dataframe without the header row new_df = table_dataframe.iloc[header_row_value + 1 :] # Set the new header new_df.columns = new_header # Get list of column names column_names = list(new_df.columns) return new_df, column_names return table_dataframe, [] @app.cell def _(excel_data, file, file_extension, load_file_dataframe, sheet_selector): table_dataframe_raw, table_column_names_raw = load_file_dataframe( file=file, file_extension=file_extension, sheet_selector=sheet_selector, excel_data=excel_data, ) return table_column_names_raw, table_dataframe_raw @app.cell def _( apply_header_readjustment, header_row, table_column_names_raw, table_dataframe_raw, ): if apply_header_readjustment is not None and apply_header_readjustment.value: table_dataframe, table_column_names = apply_header_row( table_dataframe_raw, header_row_value=header_row.value if header_row is not None else 0, ) else: table_dataframe, table_column_names = ( table_dataframe_raw, table_column_names_raw, ) return table_column_names, table_dataframe @app.cell def _(get_column_justify_state, get_column_wrapping_state, table_dataframe): if not table_dataframe.empty: # center_column_text = {col: "left" for col in table_column_names} table = mo.ui.table( table_dataframe, show_column_summaries=False, initial_selection=[0], wrapped_columns=get_column_wrapping_state(), text_justify_columns=get_column_justify_state(), label="**Select the Rows to Process**", ) else: table = mo.md("""###**No data available in the uploaded file**""").batch( upload_a_file=mo.ui.table( data=pd.DataFrame({"Upload File": "No File"}, index=[0]) ) ) return (table,) @app.cell def _(create_parameter_table, table_column_names, table_dataframe): if not table_dataframe.empty: column_selector = create_parameter_table( label="Select the Columns to Process", input_list=table_column_names, column_name="Column Options", selection_type="multi-cell", text_justify="center", ) else: column_selector = create_parameter_table( label="Select the Columns to Process", input_list=[], column_name="Column Options", selection_type="multi-cell", text_justify="center", ) return (column_selector,) @app.cell def _(column_selector, get_cell_values): columns_to_use = get_cell_values(column_selector) return (columns_to_use,) @app.cell def _(column_selector, table, table_controls_stack_and_docs): column_and_rows_sector = mo.vstack( [table_controls_stack_and_docs, column_selector, table], align="stretch", justify="start" ) return (column_and_rows_sector,) @app.cell def _( columns_to_use, convert_table_to_json_docs, set_additional_variable_state, table, ): if table.value is not None: selected_rows = table.value fields_to_process_raw = convert_table_to_json_docs( selected_rows, selected_columns=columns_to_use ) set_additional_variable_state(fields_to_process_raw) else: selected_rows = pd.DataFrame([]) fields_to_process_raw = [] return fields_to_process_raw, selected_rows @app.cell def _(client, get_model_selection_table): if client is not None: model_selector, resources, model_id_list = get_model_selection_table( client=client, model_type="chat", filter_functionality=None, selection_mode="single-cell", ) else: model_selector = get_model_selection_table( client=None, selection_mode="single-cell" ) resources = model_id_list = None return (model_selector,) @app.cell def _(): from ibm_watsonx_ai.foundation_models import ModelInference from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams # Create a form with multiple elements llm_parameters = mo.md( """ ###**LLM parameters:** {decoding_method} {repetition_penalty} {min_tokens} {max_tokens} {stop_sequences} """ ).batch( ### Preset Options decoding_method=mo.ui.dropdown( options=["greedy", "sample"], value="greedy", label="Decoding Method:" ), min_tokens=mo.ui.number(start=1, stop=1, label="Minimum Output Tokens:"), max_tokens=mo.ui.number( start=1, stop=8096, value=500, label="Maximum Output Tokens:" ), repetition_penalty=mo.ui.number( start=1.0, stop=2.0, step=0.01, label="Repetition Penalty:" ), stop_sequences=mo.ui.text( label="Stopping Sequences:", value="['<|end_of_text|>','']", placeholder="List of Strings, e.g. ['<|end_of_text|>','']", full_width=False, ), ) return GenParams, ModelInference, llm_parameters @app.cell def _(llm_setup_md): llm_setup_docs = mo.hstack([mo.md(llm_setup_md)], justify="start", widths=[1.0]) return (llm_setup_docs,) @app.cell def _(llm_setup, llm_setup_docs): llm_setup_with_docs = mo.vstack([llm_setup_docs,llm_setup], gap=1) return (llm_setup_with_docs,) @app.cell def _(llm_param_stack, model_selector): llm_setup = mo.hstack( [model_selector, llm_param_stack], align="center", justify="space-around", widths=[0.4,0.3] ) return (llm_setup,) @app.cell def _(llm_parameters, llm_sampling_parameters): llm_param_stack = mo.vstack([llm_parameters, llm_sampling_parameters], gap=2) return (llm_param_stack,) @app.cell def _(llm_parameters): if llm_parameters.value and llm_parameters.value["decoding_method"] == "sample": llm_sampling_parameters = ( mo.md(''' **Sampling parameters:** {temperature} {top_p} {top_k} {seed} ''') .batch( ### Preset Options temperature = mo.ui.number(start=0, stop=2, step=0.01, value=0.7, label="Temperature:"), top_p = mo.ui.number(start=0, stop=1, value=1, label="Top P:"), top_k = mo.ui.number(start=1, stop=100, step=1, label="Top K:"), seed = mo.ui.number(start=0 , label="Seed:"), ) ) else: llm_sampling_parameters = ( mo.md(''' ''') .batch( ) ) return (llm_sampling_parameters,) @app.cell def _(model_selector): if model_selector.value: selected_model = model_selector.value[0]["value"] else: selected_model = "mistralai/mistral-large" return (selected_model,) @app.cell def _( GenParams, ModelInference, client, llm_parameters, llm_sampling_parameters, selected_model, ): if llm_parameters.value: params = { GenParams.DECODING_METHOD: llm_parameters.value["decoding_method"], GenParams.MAX_NEW_TOKENS: llm_parameters.value["max_tokens"], GenParams.MIN_NEW_TOKENS: llm_parameters.value["min_tokens"], GenParams.REPETITION_PENALTY: llm_parameters.value["repetition_penalty"], GenParams.STOP_SEQUENCES: ast.literal_eval( llm_parameters.value["stop_sequences"] ), GenParams.RETURN_OPTIONS: { "input_text": False, "generated_tokens": False, "input_tokens": True, "token_logprobs": False, }, } if llm_sampling_parameters.value: if 'temperature' in llm_sampling_parameters.value: params["temperature"] = llm_sampling_parameters.value['temperature'] if 'top_p' in llm_sampling_parameters.value: params["top_p"] = llm_sampling_parameters.value['top_p'] if 'top_k' in llm_sampling_parameters.value: params["top_k"] = llm_sampling_parameters.value['top_k'] if 'seed' in llm_sampling_parameters.value and llm_sampling_parameters.value['seed'] != 0: params["random_seed"] = llm_sampling_parameters.value['seed'] else: params = {} if client: inf_model = ModelInference( api_client=client, model_id=selected_model, params=params ) else: inf_model = None return inf_model, params @app.cell def _(): prompt_template_folders = get_subfolder_paths("./prompt_templates", depth=1) prompt_template_model_paths = mo.ui.dropdown( options=prompt_template_folders, label="Prompt Template Variants *(Select Based on your Model)*", ) return (prompt_template_model_paths,) @app.cell def _(load_templates, prompt_template_model_paths): template_folder = str(prompt_template_model_paths.value) templates = load_templates(template_folder) return (templates,) @app.cell def _(): prompt_number_slider = mo.ui.slider( start=1, stop=5, value=1, step=1, label="Prompt Templates", show_value=True ) return (prompt_number_slider,) @app.cell def _(prompt_number_slider, prompt_template_model_paths): prompt_mechanics_stack = mo.vstack( [prompt_number_slider, prompt_template_model_paths], align="start" ) return (prompt_mechanics_stack,) @app.cell def _(prompt_and_variable_setup_md): prompt_setup_docs = mo.hstack([mo.md(prompt_and_variable_setup_md)], justify="start", widths=[1.0]) return (prompt_setup_docs,) @app.cell def _(prompt_mechanics_stack, template_selector_stack): prompt_setup_stack_widgets = mo.hstack( [prompt_mechanics_stack, template_selector_stack], justify="space-around" ) return (prompt_setup_stack_widgets,) @app.cell def _(prompt_setup_docs, prompt_setup_stack_widgets): prompt_setup_stack = mo.vstack( [prompt_setup_docs, prompt_setup_stack_widgets], gap=1 ) return (prompt_setup_stack,) @app.cell def _(): get_pt_state, set_pt_state = mo.state(None) return get_pt_state, set_pt_state @app.function def get_subfolder_paths(base_path, depth=1): """Lists subfolder paths up to specified depth. Args: base_path: Directory to search in depth: Subdirectory levels to traverse (default: 1) Returns: Dict with folder names as keys and paths as values """ import os result = {} if depth <= 0 or not os.path.isdir(base_path): return result for entry in os.scandir(base_path): if entry.is_dir(): result[entry.name] = entry.path if depth > 1: # Recursively get subdirectories subfolders = get_subfolder_paths(entry.path, depth - 1) # Update result with subfolders result.update(subfolders) return result @app.cell def _(prompt_number_slider, set_pt_state, templates): def update_state_from_templates(value): # Get current values from all template selectors template_values = [selector.value for selector in template_selectors] # Update the state with the list of values set_pt_state(template_values) # Return the list of values return template_values def create_template_dropdowns(num=1): return mo.ui.dropdown( options=templates, label=f"**Select Prompt {num} Template with Syntax:**", value="empty", on_change=update_state_from_templates, ) template_selectors = [ create_template_dropdowns(i) for i in range(1, prompt_number_slider.value + 1) ] template_selector_stack = mo.vstack(template_selectors, align="start") return (template_selector_stack,) @app.cell def _(fields_to_process): if fields_to_process: variable_names = [ key for key in fields_to_process[0].keys() if key != "_marimo_row_id" and key != "upload_a_file" ] else: variable_names = [] return (variable_names,) @app.cell def _(prompt_number_slider): def is_disabled(button_num): return prompt_number_slider.value < button_num return (is_disabled,) @app.function def create_stats_from_variables(variable_names): """ Creates mo.stat objects for each variable name in the list. Labels are formatted as "Column Variable Tag: {index+1}" Values display the variable name itself. """ stats = [] for i, var_name in enumerate(variable_names): stat = mo.stat( value=f"{{{var_name}}}", label=f"Column Variable Tag {i+1}", bordered=True, ) stats.append(stat) return mo.sidebar(stats, width="375") @app.cell def _(additional_variables_setup_md): additional_variable_docs = mo.hstack([mo.md(additional_variables_setup_md)], justify="start", widths=[1.0]) return (additional_variable_docs,) @app.cell def _(): get_additional_variable_state, set_additional_variable_state = mo.state(None) return get_additional_variable_state, set_additional_variable_state @app.cell def _( add_vars_stack, additional_variable_docs, additional_variable_mechanics_stack, ): additional_variables_stack = mo.vstack([additional_variable_docs, additional_variable_mechanics_stack, add_vars_stack], justify="start") return (additional_variables_stack,) @app.cell def _(additional_variables_raw): add_vars = [mo.vstack([add_var[0], add_var[1]]) for add_var in additional_variables_raw] add_vars_stack = mo.vstack(add_vars, heights="equal", align="stretch", justify="space-around", gap=3) return (add_vars_stack,) @app.cell def _(): use_additional_variables = mo.ui.checkbox(label="**Use additional variable boxes** *(E.g. adding large chunks of text to all prompts, etc.)*", value=True) return (use_additional_variables,) @app.cell def _(additional_variable_slider, use_additional_variables): additional_variable_mechanics_stack = mo.hstack([additional_variable_slider, use_additional_variables], align="start") return (additional_variable_mechanics_stack,) @app.cell def _(): additional_variable_slider = mo.ui.slider(start=1, stop=5, value=1, step=1, label="Additional Variables", show_value=True) return (additional_variable_slider,) @app.function def add_additional_variables(fields_to_process, use_additional_variables, additional_variables_raw, create_copy=True): """ Add additional variables to each dict in fields_to_process if conditions are met. Args: fields_to_process: List of dictionaries to process use_additional_variables: Object with boolean value property additional_variables_raw: List of [key, value] pairs create_copy: If True, creates a new copy instead of modifying the original Returns: Updated fields_to_process or a new copy with updates """ if not fields_to_process: return fields_to_process if not (use_additional_variables.value and additional_variables_raw): return fields_to_process result = [field.copy() for field in fields_to_process] if create_copy else fields_to_process additional_vars = {item[0].value: item[1].value for item in additional_variables_raw if item[0].value and item[1].value != ""} if additional_vars: for field in result: field.update(additional_vars) return result @app.cell def _(additional_variable_slider, get_additional_variable_state): def create_additional_variable_input(num=1): # Wrap in a function to create reactive dependency on state def get_template_content(num=1): content = get_additional_variable_state()[num-1] if get_additional_variable_state() and num-1 < len(get_additional_variable_state()) else "empty" return content additional_variable_column_label = mo.ui.text(label=f"**Additional Variable {num}:**", value=f"additional_var_{num}") additional_variable_editor = mo.ui.code_editor(language="python", min_height=300, theme="dark") return [ additional_variable_column_label, additional_variable_editor ] additional_variables_raw = [create_additional_variable_input(i) for i in range(1, additional_variable_slider.value+1)] return (additional_variables_raw,) @app.cell def _( additional_variables_raw, fields_to_process_raw, use_additional_variables, ): if fields_to_process_raw and use_additional_variables.value and additional_variables_raw: fields_to_process = add_additional_variables(fields_to_process_raw, use_additional_variables, additional_variables_raw) else: fields_to_process = fields_to_process_raw return (fields_to_process,) @app.cell def _(variable_names): if variable_names: prompt_input_variables_sidebar = create_stats_from_variables(variable_names) else: prompt_input_variables_sidebar = None return (prompt_input_variables_sidebar,) @app.cell def _(get_pt_state, prompt_number_slider): def create_prompt(num=1): # Wrap in a function to create reactive dependency on state def get_template_content(num=1): content = ( get_pt_state()[num - 1] if get_pt_state() and num - 1 < len(get_pt_state()) else "empty" ) return content prompt_column_label = mo.ui.text( label=f"**Add output column name for Prompt {num}:**", value=f"Added Column {num}", ) prompt_editor = mo.ui.code_editor( value=get_template_content(num), language="python", min_height=300, theme="dark", ) return [prompt_column_label, prompt_editor] prompts_raw = [create_prompt(i) for i in range(1, prompt_number_slider.value + 1)] return (prompts_raw,) @app.cell def _(is_disabled): run_prompt_button_1 = mo.ui.run_button( label="Run Prompt 1", disabled=is_disabled(1) ) return (run_prompt_button_1,) @app.cell def _(is_disabled): run_prompt_button_2 = mo.ui.run_button( label="Run Prompt 2", disabled=is_disabled(2) ) return (run_prompt_button_2,) @app.cell def _(is_disabled): run_prompt_button_3 = mo.ui.run_button( label="Run Prompt 3", disabled=is_disabled(3) ) return (run_prompt_button_3,) @app.cell def _(is_disabled): run_prompt_button_4 = mo.ui.run_button( label="Run Prompt 4", disabled=is_disabled(4) ) return (run_prompt_button_4,) @app.cell def _(is_disabled): run_prompt_button_5 = mo.ui.run_button( label="Run Prompt 5", disabled=is_disabled(5) ) return (run_prompt_button_5,) @app.cell def _( activate_prompt_lineage, append_llm_results_to_dataframe, fields_to_process, inf_model, lineage_options, params, process_prompt_lineage, process_with_llm, prompts_raw, results_table, run_prompt_button_1, selected_model, selected_rows, ): if run_prompt_button_1.value: pt_1_prompt = prompts_raw[0][1].value pt_1_column = prompts_raw[0][0].value prompt_answers_1 = process_with_llm( fields_to_process=fields_to_process, prompt_template=pt_1_prompt, inf_model=inf_model, params=params, batch_size=10, ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_answers_1, selection_table=selected_rows, column_name=pt_1_column, ) if activate_prompt_lineage.value: prompt_lineage_1 = process_prompt_lineage( lineage_options=lineage_options, selected_model=selected_model, params=params, fields_to_process=fields_to_process, prompt_template=pt_1_prompt ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_lineage_1, selection_table=selected_rows, column_name=f"{pt_1_column}___prompt_lineage", ) else: pt_1_prompt = pt_1_column = prompt_lineage_1 = None prompt_answers_1 = [] return @app.cell def _( activate_prompt_lineage, append_llm_results_to_dataframe, fields_to_process, inf_model, lineage_options, params, process_prompt_lineage, process_with_llm, prompts_raw, results_table, run_prompt_button_2, selected_model, selected_rows, ): if run_prompt_button_2.value: pt_2_prompt = prompts_raw[1][1].value pt_2_column = prompts_raw[1][0].value prompt_answers_2 = process_with_llm( fields_to_process=fields_to_process, prompt_template=pt_2_prompt, inf_model=inf_model, params=params, batch_size=10, ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_answers_2, selection_table=selected_rows, column_name=pt_2_column, ) if activate_prompt_lineage.value: prompt_lineage_2 = process_prompt_lineage( lineage_options=lineage_options, selected_model=selected_model, params=params, fields_to_process=fields_to_process, prompt_template=pt_2_prompt ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_lineage_2, selection_table=selected_rows, column_name=f"{pt_2_column}___prompt_lineage", ) else: pt_2_prompt = pt_2_column = prompt_lineage_2 = None prompt_answers_2 = [] return @app.cell def _( activate_prompt_lineage, append_llm_results_to_dataframe, fields_to_process, inf_model, lineage_options, params, process_prompt_lineage, process_with_llm, prompts_raw, results_table, run_prompt_button_3, selected_model, selected_rows, ): if run_prompt_button_3.value: pt_3_prompt = prompts_raw[2][1].value pt_3_column = prompts_raw[2][0].value prompt_answers_3 = process_with_llm( fields_to_process=fields_to_process, prompt_template=pt_3_prompt, inf_model=inf_model, params=params, batch_size=10, ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_answers_3, selection_table=selected_rows, column_name=pt_3_column, ) if activate_prompt_lineage.value: prompt_lineage_3 = process_prompt_lineage( lineage_options=lineage_options, selected_model=selected_model, params=params, fields_to_process=fields_to_process, prompt_template=pt_3_prompt ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_lineage_3, selection_table=selected_rows, column_name=f"{pt_3_column}___prompt_lineage", ) else: pt_3_prompt = pt_3_column = prompt_lineage_3 = None prompt_answers_3 = [] return @app.cell def _( activate_prompt_lineage, append_llm_results_to_dataframe, fields_to_process, inf_model, lineage_options, params, process_prompt_lineage, process_with_llm, prompts_raw, results_table, run_prompt_button_4, selected_model, selected_rows, ): if run_prompt_button_4.value: pt_4_prompt = prompts_raw[3][1].value pt_4_column = prompts_raw[3][0].value prompt_answers_4 = process_with_llm( fields_to_process=fields_to_process, prompt_template=pt_4_prompt, inf_model=inf_model, params=params, batch_size=10, ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_answers_4, selection_table=selected_rows, column_name=pt_4_column, ) if activate_prompt_lineage.value: prompt_lineage_4 = process_prompt_lineage( lineage_options=lineage_options, selected_model=selected_model, params=params, fields_to_process=fields_to_process, prompt_template=pt_4_prompt ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_lineage_4, selection_table=selected_rows, column_name=f"{pt_4_column}___prompt_lineage", ) else: pt_4_prompt = pt_4_column = prompt_lineage_4 = None prompt_answers_4 = [] return @app.cell def _( activate_prompt_lineage, append_llm_results_to_dataframe, fields_to_process, inf_model, lineage_options, params, process_prompt_lineage, process_with_llm, prompts_raw, results_table, run_prompt_button_5, selected_model, selected_rows, ): if run_prompt_button_5.value: pt_5_prompt = prompts_raw[4][1].value pt_5_column = prompts_raw[4][0].value prompt_answers_5 = process_with_llm( fields_to_process=fields_to_process, prompt_template=pt_5_prompt, inf_model=inf_model, params=params, batch_size=10, ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_answers_5, selection_table=selected_rows, column_name=pt_5_column, ) if activate_prompt_lineage.value: prompt_lineage_5 = process_prompt_lineage( lineage_options=lineage_options, selected_model=selected_model, params=params, fields_to_process=fields_to_process, prompt_template=pt_5_prompt ) append_llm_results_to_dataframe( target_dataframe=results_table, llm_results=prompt_lineage_5, selection_table=selected_rows, column_name=f"{pt_5_column}___prompt_lineage", ) else: pt_5_prompt = pt_5_column = prompt_lineage_5 = None prompt_answers_5 = [] return @app.cell def _(results_stack): ui_accordion_section_8 = mo.accordion( {"Section 8: **Run and View Results**": results_stack} ) ui_accordion_section_8 return @app.cell def _(show_variable_sidebar): show_variable_sidebar.right() return @app.cell def _(prompt_input_variables_sidebar, show_variable_sidebar): show_sidebar = show_variable_sidebar.value mo.vstack([show_sidebar is not False and prompt_input_variables_sidebar]).right() return @app.cell def _(table_dataframe): if not table_dataframe.empty: results_table = table_dataframe.copy() else: results_table = [] return (results_table,) @app.cell def _( results_table, run_prompt_button_1, run_prompt_button_2, run_prompt_button_3, run_prompt_button_4, run_prompt_button_5, ): if ( run_prompt_button_1.value or run_prompt_button_2.value or run_prompt_button_3.value or run_prompt_button_4.value or run_prompt_button_5.value ): results_table_view = mo.ui.table(results_table, show_column_summaries=False) else: results_table_view = mo.ui.table(results_table, show_column_summaries=False) return (results_table_view,) @app.cell def _(prompt_stack_md): prompt_stack_docs = mo.hstack([mo.md(prompt_stack_md)], justify="start", widths=[1.0]) return (prompt_stack_docs,) @app.cell def _(results_stack_md): result_stack_docs = mo.hstack([mo.md(results_stack_md)], justify="start", widths=[1.0]) return (result_stack_docs,) @app.cell def _(prompt_stack_docs, prompts_raw): prompts = [prompt_stack_docs]+[mo.vstack([prompt[0], prompt[1]]) for prompt in prompts_raw] prompt_stack = mo.vstack( prompts, heights="equal", align="stretch", justify="space-around", gap=3 ) return (prompt_stack,) @app.cell def _( result_stack_docs, results_table_view, run_prompt_button_1, run_prompt_button_2, run_prompt_button_3, run_prompt_button_4, run_prompt_button_5, ): run_button_stack = mo.hstack( [ run_prompt_button_1, run_prompt_button_2, run_prompt_button_3, run_prompt_button_4, run_prompt_button_5, ] ) results_stack = mo.vstack([result_stack_docs, run_button_stack, results_table_view]) return (results_stack,) @app.cell def _(): return @app.cell def _(): return @app.cell def _(): return @app.cell def _(): return if __name__ == "__main__": app.run()