{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Text-Optimizer (Evaluator-Optimizer-pattern)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Start with imports - ask ChatGPT to e\n", "import os\n", "import json\n", "from dotenv import load_dotenv\n", "from openai import OpenAI\n", "from IPython.display import Markdown, display" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Refreshing dot env\n", "
" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "load_dotenv(override=True)\n", "open_api_key = os.getenv(\"OPENAI_API_KEY\")\n", "groq_api_key = os.getenv(\"GROQ_API_KEY\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "API Key Validator" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from openai import api_key\n", "\n", "\n", "def api_key_checker(api_key):\n", " if api_key:\n", " print(f\"API Key exists and begins {api_key[:8]}\")\n", " else:\n", " print(\"API Key not set\")\n", "\n", "api_key_checker(groq_api_key)\n", "api_key_checker(open_api_key) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Helper Functions\n", "\n", "### 1. `llm_optimizer` (for refining the prompted text) - GROQ\n", "- **Purpose**: Generates optimized versions of text based on evaluator feedback\n", "- **System Message**: \"You are a helpful assistant that refines text based on evaluator feedback. \n", "\n", "### 2. `llm_evaluator` (for judging the llm_optimizer's output) - OpenAI\n", "- **Purpose**: Evaluates the quality of LLM responses using another LLM as a judge\n", "- **Quality Threshold**: Requires score ≄ 0.7 for acceptance\n", "\n", "### 3. `optimize_prompt` (runner)\n", "- **Purpose**: Iteratively optimizes prompts using LLM feedback loop\n", "- **Process**:\n", " 1. LLM optimizer generates improved version\n", " 2. LLM evaluator assesses quality and line count\n", " 3. If accepted, process stops; if not, feedback used for next iteration\n", "- **Max Iterations**: 5 attempts by default" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "def generate_llm_response(provider, system_msg, user_msg, temperature=0.7):\n", " if provider == \"groq\":\n", " from openai import OpenAI\n", " client = OpenAI(\n", " api_key=groq_api_key,\n", " base_url=\"https://api.groq.com/openai/v1\"\n", " )\n", " model = \"llama-3.3-70b-versatile\"\n", " elif provider == \"openai\":\n", " from openai import OpenAI\n", " client = OpenAI(api_key=open_api_key)\n", " model = \"gpt-4o-mini\"\n", " else:\n", " raise ValueError(f\"Unsupported provider: {provider}\")\n", "\n", " response = client.chat.completions.create(\n", " model=model,\n", " messages=[\n", " {\"role\": \"system\", \"content\": system_msg},\n", " {\"role\": \"user\", \"content\": user_msg}\n", " ],\n", " temperature=temperature\n", " )\n", " return response.choices[0].message.content.strip()\n", "\n", "def llm_optimizer(provider, prompt, feedback=None):\n", " system_msg = \"You are a helpful assistant that refines text based on evaluator feedback. CRITICAL: You must respond with EXACTLY 3 lines or fewer. Be extremely concise and direct\"\n", " user_msg = prompt if not feedback else f\"Refine this text to address the feedback: '{feedback}'\\n\\nText:\\n{prompt}\"\n", " return generate_llm_response(provider, system_msg, user_msg, temperature=0.7)\n", "\n", "\n", "def llm_evaluator(provider, prompt, response):\n", " \n", " # Define the evaluator's role and evaluation criteria\n", " evaluator_system_message = \"You are a strict evaluator judging the quality of LLM outputs.\"\n", " \n", " # Create the evaluation prompt with clear instructions\n", " evaluation_prompt = (\n", " f\"Evaluate the following response to the prompt. More concise language is better. CRITICAL: You must respond with EXACTLY 3 lines or fewer. Be extremely concise and direct\"\n", " f\"Score it 0–1. If under 0.7, explain what must be improved.\\n\\n\"\n", " f\"Prompt: {prompt}\\n\\nResponse: {response}\"\n", " )\n", " \n", " # Get evaluation from LLM with temperature=0 for consistency\n", " evaluation_result = generate_llm_response(provider, evaluator_system_message, evaluation_prompt, temperature=0)\n", " \n", " # Parse the evaluation score\n", " # Look for explicit score mentions in the response\n", " has_acceptable_score = \"Score: 0.7\" in evaluation_result or \"Score: 1\" in evaluation_result\n", " quality_score = 1.0 if has_acceptable_score else 0.5\n", " \n", " # Determine if response meets quality threshold\n", " is_accepted = quality_score >= 0.7\n", " \n", " # Return appropriate feedback based on acceptance\n", " feedback = None if is_accepted else evaluation_result\n", " \n", " return is_accepted, feedback\n", "\n", "def optimize_prompt_runner(prompt, provider=\"groq\", max_iterations=5):\n", " current_text = prompt\n", " previous_feedback = None\n", " \n", " for iteration in range(max_iterations):\n", " print(f\"\\nšŸ”„ Iteration {iteration + 1}\")\n", " \n", " # Step 1: Generate optimized version based on current text and feedback\n", " optimized_text = llm_optimizer(provider, current_text, previous_feedback)\n", " print(f\"🧠 Optimized: {optimized_text}\\n\")\n", " \n", " # Step 2: Evaluate the optimized version\n", " is_accepted, evaluation_feedback = llm_evaluator('openai', prompt, optimized_text)\n", " \n", " if is_accepted:\n", " print(\"āœ… Accepted by evaluator\")\n", " return optimized_text\n", " else:\n", " print(f\"āŒ Feedback: {evaluation_feedback}\\n\")\n", " # Step 3: Prepare for next iteration\n", " current_text = optimized_text\n", " previous_feedback = evaluation_feedback \n", "\n", " print(\"āš ļø Max iterations reached.\")\n", " return current_text\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Testing the Evaluator-Optimizer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "prompt = \"Summarize faiss vector search\"\n", "final_output = optimize_prompt_runner(prompt, provider=\"groq\")\n", "print(f\"šŸŽÆ Final Output: {final_output}\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.8" } }, "nbformat": 4, "nbformat_minor": 2 }