Spaces:

ml6team
/

secret-agent-guardrail-challenge

Running

File size: 2,677 Bytes

cc8705b
 
 
 
61d35d5
 
 
cc8705b
 
 
 
 
 
 
 
 
 
 
 
eac13d7
cc8705b
 
 
61d35d5
cc8705b
 
61d35d5
cc8705b
 
 
 
 
 
 
eac13d7
cc8705b
 
 
eac13d7
cc8705b
 
 
 
 
eac13d7
 
 
cc8705b
 
 
 
 
eac13d7
cc8705b
 
 
61d35d5
cc8705b
 
 
 
 
 
 
 
eac13d7
cc8705b
 
 
eac13d7
cc8705b
 
 
 
 
eac13d7
cc8705b
 
eac13d7
cc8705b
 
 
 
eac13d7
cc8705b
 
eac13d7
cc8705b
 
 
 
eac13d7

import os

from orq_ai_sdk import OrqAI

import logging

LOGGER = logging.getLogger(__name__)

client = OrqAI(api_key=os.environ["ORQ_API_KEY"], environment="develop")

special_checks = {
    3: "level-3-llm-judge",
    6: "level-6-llm-judge",
}


def stream_request(variant: str, secret: str, user_input: str):
    """Stream the response from the model."""
    stream = client.deployments.invoke_with_stream(
        key="llm-security-challenge-demo",
        context={"step": variant},  # , "environments": []},
        inputs={"secret": secret, "user_input": user_input},
    )
    LOGGER.info(stream)

    for chunk in stream:
        LOGGER.info(chunk)
        if not chunk.is_final:
            yield chunk.choices[0].message.content


def get_full_prompt(variant: str, secret: str = None, user_input: str = None):
    """Get the full prompt from a specific deployment."""
    deployment_config = client.deployments.get_config(
        key="llm-security-challenge-demo",
        context={"step": variant},  # , "environments": []},
    ).to_dict()
    prompts = {
        p["role"] + "_prompt": p["content"] for p in deployment_config["messages"]
    }

    if secret:
        prompts["user_prompt"] = prompts["user_prompt"].replace("{{secret}}", secret)
    if user_input:
        prompts["user_prompt"] = prompts["user_prompt"].replace(
            "{{user_input}}", user_input
        )
    return prompts


def run_judge(level: int, inputs: dict):
    generation = client.deployments.invoke(
        key="llm-security-challenge-demo",
        context={"step": special_checks[level]},
        inputs=inputs,
    )
    LOGGER.info(generation.choices[0].message.content)
    answer = generation.choices[0].message.content.split(" ")[-1]
    return answer.lower() == "yes"


def is_subsequence(main_string, sub_string):
    """
    Checks if sub_string is a subsequence of main_string.
    A subsequence allows arbitrary characters in between the characters of sub_string in main_string.

    Parameters:
    main_string (str): The string in which to search.
    sub_string (str): The string to search for.

    Returns:
    bool: True if sub_string is a subsequence of main_string, False otherwise.
    """
    main_string = main_string.lower()
    sub_string = sub_string.lower()

    main_len = len(main_string)
    sub_len = len(sub_string)

    if sub_len == 0:
        return True
    if main_len == 0:
        return False

    main_index = 0
    sub_index = 0

    while main_index < main_len and sub_index < sub_len:
        if main_string[main_index] == sub_string[sub_index]:
            sub_index += 1
        main_index += 1

    return sub_index == sub_len