File size: 2,031 Bytes
8d5b4ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
from dotenv import load_dotenv
import gradio as gr
from langchain_huggingface import HuggingFaceEndpoint

# Load environment variables
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")

# Initialize the primary chatbot model
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",  # Replace with your model repo
    huggingfacehub_api_token=HF_TOKEN.strip(),
    temperature=0.7,
    max_new_tokens=100
)

# Initialize the Meta-Llama Guard model
guard_llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Meta-Llama-Guard-2-8B",
    huggingfacehub_api_token=HF_TOKEN.strip(),
    temperature=0.7,
    max_new_tokens=100
)

# Function for guarded chatbot response
def guarded_chatbot_response(message):
    try:
        # Get raw response from primary model
        raw_response = llm(message)

        # Validate with Meta-Llama Guard
        guard_prompt = f"""
        You are a content safety guard for kids under 16. Analyze the following text and ensure it is friendly and appropriate for children.
        If appropriate, return the text as is. If not, rephrase or respond with "Content not suitable." 
        Text: {raw_response}
        """
        guard_response = guard_llm(guard_prompt)
        return guard_response
    except Exception as e:
        return f"Error: {e}"


# Gradio Interface for Chatbot with Guardrails
with gr.Blocks() as app_with_guardrails:
    gr.Markdown("## Chatbot With Guardrails")
    gr.Markdown("This chatbot ensures all responses are appropriate for kids under 16.")

    # Input and output
    with gr.Row():
        user_input = gr.Textbox(label="Your Message", placeholder="Type here...")
    response_output = gr.Textbox(label="Guarded Response", placeholder="Bot will respond here...")
    submit_button = gr.Button("Send")

    # Button click event
    submit_button.click(
        guarded_chatbot_response,
        inputs=[user_input],
        outputs=[response_output]
    )

# Launch the app
if __name__ == "__main__":
    app_with_guardrails.launch()