File size: 7,779 Bytes
0af0679 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<b>Text-Optimizer (Evaluator-Optimizer-pattern)</b>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Start with imports - ask ChatGPT to e\n",
"import os\n",
"import json\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<b>Refreshing dot env</b>\n",
"</br>"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(override=True)\n",
"open_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
"groq_api_key = os.getenv(\"GROQ_API_KEY\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"API Key Validator"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from openai import api_key\n",
"\n",
"\n",
"def api_key_checker(api_key):\n",
" if api_key:\n",
" print(f\"API Key exists and begins {api_key[:8]}\")\n",
" else:\n",
" print(\"API Key not set\")\n",
"\n",
"api_key_checker(groq_api_key)\n",
"api_key_checker(open_api_key) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper Functions\n",
"\n",
"### 1. `llm_optimizer` (for refining the prompted text) - GROQ\n",
"- **Purpose**: Generates optimized versions of text based on evaluator feedback\n",
"- **System Message**: \"You are a helpful assistant that refines text based on evaluator feedback. \n",
"\n",
"### 2. `llm_evaluator` (for judging the llm_optimizer's output) - OpenAI\n",
"- **Purpose**: Evaluates the quality of LLM responses using another LLM as a judge\n",
"- **Quality Threshold**: Requires score β₯ 0.7 for acceptance\n",
"\n",
"### 3. `optimize_prompt` (runner)\n",
"- **Purpose**: Iteratively optimizes prompts using LLM feedback loop\n",
"- **Process**:\n",
" 1. LLM optimizer generates improved version\n",
" 2. LLM evaluator assesses quality and line count\n",
" 3. If accepted, process stops; if not, feedback used for next iteration\n",
"- **Max Iterations**: 5 attempts by default"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def generate_llm_response(provider, system_msg, user_msg, temperature=0.7):\n",
" if provider == \"groq\":\n",
" from openai import OpenAI\n",
" client = OpenAI(\n",
" api_key=groq_api_key,\n",
" base_url=\"https://api.groq.com/openai/v1\"\n",
" )\n",
" model = \"llama-3.3-70b-versatile\"\n",
" elif provider == \"openai\":\n",
" from openai import OpenAI\n",
" client = OpenAI(api_key=open_api_key)\n",
" model = \"gpt-4o-mini\"\n",
" else:\n",
" raise ValueError(f\"Unsupported provider: {provider}\")\n",
"\n",
" response = client.chat.completions.create(\n",
" model=model,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_msg},\n",
" {\"role\": \"user\", \"content\": user_msg}\n",
" ],\n",
" temperature=temperature\n",
" )\n",
" return response.choices[0].message.content.strip()\n",
"\n",
"def llm_optimizer(provider, prompt, feedback=None):\n",
" system_msg = \"You are a helpful assistant that refines text based on evaluator feedback. CRITICAL: You must respond with EXACTLY 3 lines or fewer. Be extremely concise and direct\"\n",
" user_msg = prompt if not feedback else f\"Refine this text to address the feedback: '{feedback}'\\n\\nText:\\n{prompt}\"\n",
" return generate_llm_response(provider, system_msg, user_msg, temperature=0.7)\n",
"\n",
"\n",
"def llm_evaluator(provider, prompt, response):\n",
" \n",
" # Define the evaluator's role and evaluation criteria\n",
" evaluator_system_message = \"You are a strict evaluator judging the quality of LLM outputs.\"\n",
" \n",
" # Create the evaluation prompt with clear instructions\n",
" evaluation_prompt = (\n",
" f\"Evaluate the following response to the prompt. More concise language is better. CRITICAL: You must respond with EXACTLY 3 lines or fewer. Be extremely concise and direct\"\n",
" f\"Score it 0β1. If under 0.7, explain what must be improved.\\n\\n\"\n",
" f\"Prompt: {prompt}\\n\\nResponse: {response}\"\n",
" )\n",
" \n",
" # Get evaluation from LLM with temperature=0 for consistency\n",
" evaluation_result = generate_llm_response(provider, evaluator_system_message, evaluation_prompt, temperature=0)\n",
" \n",
" # Parse the evaluation score\n",
" # Look for explicit score mentions in the response\n",
" has_acceptable_score = \"Score: 0.7\" in evaluation_result or \"Score: 1\" in evaluation_result\n",
" quality_score = 1.0 if has_acceptable_score else 0.5\n",
" \n",
" # Determine if response meets quality threshold\n",
" is_accepted = quality_score >= 0.7\n",
" \n",
" # Return appropriate feedback based on acceptance\n",
" feedback = None if is_accepted else evaluation_result\n",
" \n",
" return is_accepted, feedback\n",
"\n",
"def optimize_prompt_runner(prompt, provider=\"groq\", max_iterations=5):\n",
" current_text = prompt\n",
" previous_feedback = None\n",
" \n",
" for iteration in range(max_iterations):\n",
" print(f\"\\nπ Iteration {iteration + 1}\")\n",
" \n",
" # Step 1: Generate optimized version based on current text and feedback\n",
" optimized_text = llm_optimizer(provider, current_text, previous_feedback)\n",
" print(f\"π§ Optimized: {optimized_text}\\n\")\n",
" \n",
" # Step 2: Evaluate the optimized version\n",
" is_accepted, evaluation_feedback = llm_evaluator('openai', prompt, optimized_text)\n",
" \n",
" if is_accepted:\n",
" print(\"β
Accepted by evaluator\")\n",
" return optimized_text\n",
" else:\n",
" print(f\"β Feedback: {evaluation_feedback}\\n\")\n",
" # Step 3: Prepare for next iteration\n",
" current_text = optimized_text\n",
" previous_feedback = evaluation_feedback \n",
"\n",
" print(\"β οΈ Max iterations reached.\")\n",
" return current_text\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Testing the Evaluator-Optimizer"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"prompt = \"Summarize faiss vector search\"\n",
"final_output = optimize_prompt_runner(prompt, provider=\"groq\")\n",
"print(f\"π― Final Output: {final_output}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|