{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Text-Optimizer (Evaluator-Optimizer-pattern)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Start with imports - ask ChatGPT to e\n",
"import os\n",
"import json\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Refreshing dot env\n",
""
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(override=True)\n",
"open_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
"groq_api_key = os.getenv(\"GROQ_API_KEY\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"API Key Validator"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from openai import api_key\n",
"\n",
"\n",
"def api_key_checker(api_key):\n",
" if api_key:\n",
" print(f\"API Key exists and begins {api_key[:8]}\")\n",
" else:\n",
" print(\"API Key not set\")\n",
"\n",
"api_key_checker(groq_api_key)\n",
"api_key_checker(open_api_key) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper Functions\n",
"\n",
"### 1. `llm_optimizer` (for refining the prompted text) - GROQ\n",
"- **Purpose**: Generates optimized versions of text based on evaluator feedback\n",
"- **System Message**: \"You are a helpful assistant that refines text based on evaluator feedback. \n",
"\n",
"### 2. `llm_evaluator` (for judging the llm_optimizer's output) - OpenAI\n",
"- **Purpose**: Evaluates the quality of LLM responses using another LLM as a judge\n",
"- **Quality Threshold**: Requires score ā„ 0.7 for acceptance\n",
"\n",
"### 3. `optimize_prompt` (runner)\n",
"- **Purpose**: Iteratively optimizes prompts using LLM feedback loop\n",
"- **Process**:\n",
" 1. LLM optimizer generates improved version\n",
" 2. LLM evaluator assesses quality and line count\n",
" 3. If accepted, process stops; if not, feedback used for next iteration\n",
"- **Max Iterations**: 5 attempts by default"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def generate_llm_response(provider, system_msg, user_msg, temperature=0.7):\n",
" if provider == \"groq\":\n",
" from openai import OpenAI\n",
" client = OpenAI(\n",
" api_key=groq_api_key,\n",
" base_url=\"https://api.groq.com/openai/v1\"\n",
" )\n",
" model = \"llama-3.3-70b-versatile\"\n",
" elif provider == \"openai\":\n",
" from openai import OpenAI\n",
" client = OpenAI(api_key=open_api_key)\n",
" model = \"gpt-4o-mini\"\n",
" else:\n",
" raise ValueError(f\"Unsupported provider: {provider}\")\n",
"\n",
" response = client.chat.completions.create(\n",
" model=model,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_msg},\n",
" {\"role\": \"user\", \"content\": user_msg}\n",
" ],\n",
" temperature=temperature\n",
" )\n",
" return response.choices[0].message.content.strip()\n",
"\n",
"def llm_optimizer(provider, prompt, feedback=None):\n",
" system_msg = \"You are a helpful assistant that refines text based on evaluator feedback. CRITICAL: You must respond with EXACTLY 3 lines or fewer. Be extremely concise and direct\"\n",
" user_msg = prompt if not feedback else f\"Refine this text to address the feedback: '{feedback}'\\n\\nText:\\n{prompt}\"\n",
" return generate_llm_response(provider, system_msg, user_msg, temperature=0.7)\n",
"\n",
"\n",
"def llm_evaluator(provider, prompt, response):\n",
" \n",
" # Define the evaluator's role and evaluation criteria\n",
" evaluator_system_message = \"You are a strict evaluator judging the quality of LLM outputs.\"\n",
" \n",
" # Create the evaluation prompt with clear instructions\n",
" evaluation_prompt = (\n",
" f\"Evaluate the following response to the prompt. More concise language is better. CRITICAL: You must respond with EXACTLY 3 lines or fewer. Be extremely concise and direct\"\n",
" f\"Score it 0ā1. If under 0.7, explain what must be improved.\\n\\n\"\n",
" f\"Prompt: {prompt}\\n\\nResponse: {response}\"\n",
" )\n",
" \n",
" # Get evaluation from LLM with temperature=0 for consistency\n",
" evaluation_result = generate_llm_response(provider, evaluator_system_message, evaluation_prompt, temperature=0)\n",
" \n",
" # Parse the evaluation score\n",
" # Look for explicit score mentions in the response\n",
" has_acceptable_score = \"Score: 0.7\" in evaluation_result or \"Score: 1\" in evaluation_result\n",
" quality_score = 1.0 if has_acceptable_score else 0.5\n",
" \n",
" # Determine if response meets quality threshold\n",
" is_accepted = quality_score >= 0.7\n",
" \n",
" # Return appropriate feedback based on acceptance\n",
" feedback = None if is_accepted else evaluation_result\n",
" \n",
" return is_accepted, feedback\n",
"\n",
"def optimize_prompt_runner(prompt, provider=\"groq\", max_iterations=5):\n",
" current_text = prompt\n",
" previous_feedback = None\n",
" \n",
" for iteration in range(max_iterations):\n",
" print(f\"\\nš Iteration {iteration + 1}\")\n",
" \n",
" # Step 1: Generate optimized version based on current text and feedback\n",
" optimized_text = llm_optimizer(provider, current_text, previous_feedback)\n",
" print(f\"š§ Optimized: {optimized_text}\\n\")\n",
" \n",
" # Step 2: Evaluate the optimized version\n",
" is_accepted, evaluation_feedback = llm_evaluator('openai', prompt, optimized_text)\n",
" \n",
" if is_accepted:\n",
" print(\"ā
Accepted by evaluator\")\n",
" return optimized_text\n",
" else:\n",
" print(f\"ā Feedback: {evaluation_feedback}\\n\")\n",
" # Step 3: Prepare for next iteration\n",
" current_text = optimized_text\n",
" previous_feedback = evaluation_feedback \n",
"\n",
" print(\"ā ļø Max iterations reached.\")\n",
" return current_text\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Testing the Evaluator-Optimizer"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"prompt = \"Summarize faiss vector search\"\n",
"final_output = optimize_prompt_runner(prompt, provider=\"groq\")\n",
"print(f\"šÆ Final Output: {final_output}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}