Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,399 +1,275 @@
|
|
1 |
# algoforge_prime/app.py
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
-
import time
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
from core.llm_clients import initialize_all_clients, is_gemini_api_configured, is_hf_api_configured # Use getters
|
9 |
-
initialize_all_clients() # CRITICAL: Call initialization first
|
10 |
|
11 |
-
# Now get the status AFTER initialization
|
12 |
GEMINI_API_READY = is_gemini_api_configured()
|
13 |
HF_API_READY = is_hf_api_configured()
|
14 |
|
15 |
from core.generation_engine import generate_initial_solutions
|
16 |
-
from core.evaluation_engine import evaluate_solution_candidate,
|
17 |
from core.evolution_engine import evolve_solution
|
18 |
from prompts.system_prompts import get_system_prompt
|
19 |
from prompts.prompt_templates import format_code_test_analysis_user_prompt
|
|
|
20 |
|
21 |
# --- Application Configuration (Models, Defaults) ---
|
|
|
22 |
AVAILABLE_MODELS_CONFIG = {}
|
23 |
UI_DEFAULT_MODEL_KEY = None
|
24 |
-
|
25 |
-
# Define Gemini 1.5 model IDs (use the exact strings from Google's documentation)
|
26 |
-
# These are common aliases; specific versioned IDs might also be available.
|
27 |
GEMINI_1_5_PRO_LATEST_ID = "gemini-1.5-pro-latest"
|
28 |
GEMINI_1_5_FLASH_LATEST_ID = "gemini-1.5-flash-latest"
|
29 |
-
|
30 |
-
# Populate with Gemini models first if API is configured
|
31 |
if GEMINI_API_READY:
|
32 |
AVAILABLE_MODELS_CONFIG.update({
|
33 |
f"Google Gemini 1.5 Pro (API - Recommended)": {"id": GEMINI_1_5_PRO_LATEST_ID, "type": "google_gemini"},
|
34 |
f"Google Gemini 1.5 Flash (API - Fast)": {"id": GEMINI_1_5_FLASH_LATEST_ID, "type": "google_gemini"},
|
35 |
-
# You can add older Gemini versions here if needed
|
36 |
"Google Gemini 1.0 Pro (API - Legacy)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
|
37 |
})
|
38 |
-
# Prioritize 1.5 Pro as default
|
39 |
UI_DEFAULT_MODEL_KEY = f"Google Gemini 1.5 Pro (API - Recommended)"
|
40 |
-
|
41 |
-
|
42 |
-
UI_DEFAULT_MODEL_KEY = f"Google Gemini 1.5 Flash (API - Fast)"
|
43 |
-
print(f"INFO: app.py - Gemini models populated. Default set to: {UI_DEFAULT_MODEL_KEY}")
|
44 |
-
else:
|
45 |
-
print("WARNING: app.py - Gemini API not configured (checked via getter); Gemini models will be unavailable.")
|
46 |
-
|
47 |
-
# Populate with Hugging Face models if API is configured
|
48 |
if HF_API_READY:
|
49 |
AVAILABLE_MODELS_CONFIG.update({
|
50 |
"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
|
51 |
"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
|
52 |
-
"CodeLlama 7B Instruct (HF)": {"id": "codellama/CodeLlama-7b-Instruct-hf", "type": "hf"},
|
53 |
})
|
54 |
-
if not UI_DEFAULT_MODEL_KEY:
|
55 |
-
|
56 |
-
print("INFO: app.py - HF models populated; default set to an HF model as Gemini was not available.")
|
57 |
-
else:
|
58 |
-
print("INFO: app.py - HF models also populated as alternatives.")
|
59 |
-
else:
|
60 |
-
print("WARNING: app.py - Hugging Face API not configured (checked via getter); HF models will be unavailable.")
|
61 |
-
|
62 |
-
# Absolute fallback if no models could be configured at all
|
63 |
if not AVAILABLE_MODELS_CONFIG:
|
64 |
-
print("CRITICAL APP ERROR: No models could be configured. Check API keys in Space Secrets and restart Space.")
|
65 |
AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys & Restart)"] = {"id": "dummy_error", "type": "none"}
|
66 |
UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys & Restart)"
|
67 |
-
elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG:
|
68 |
-
# If somehow UI_DEFAULT_MODEL_KEY is still None, pick the first available model
|
69 |
-
UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0]
|
70 |
-
print(f"WARNING: app.py - UI_DEFAULT_MODEL_KEY was not set by primary logic, falling back to first available: {UI_DEFAULT_MODEL_KEY}")
|
71 |
|
72 |
|
73 |
# --- Main Orchestration Logic for Gradio ---
|
74 |
def run_algoforge_simulation_orchestrator(
|
75 |
-
problem_type_selected: str,
|
76 |
-
|
77 |
-
|
78 |
-
user_provided_tests_code: str,
|
79 |
-
num_initial_solutions_to_gen: int,
|
80 |
-
selected_model_ui_key: str,
|
81 |
-
genesis_temp: float, genesis_max_tokens: int,
|
82 |
-
critique_temp: float, critique_max_tokens: int,
|
83 |
evolution_temp: float, evolution_max_tokens: int,
|
84 |
-
progress=gr.Progress(track_tqdm=True)
|
85 |
):
|
86 |
progress(0, desc="Initializing AlgoForge Prime™...")
|
87 |
-
log_entries = [f"**AlgoForge Prime™ Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}**"]
|
88 |
-
|
89 |
-
|
90 |
-
# Basic input validation
|
91 |
-
if not problem_description_text.strip():
|
92 |
-
error_msg = "CRITICAL INPUT ERROR: Problem Description is mandatory. Please describe the problem."
|
93 |
-
log_entries.append(error_msg)
|
94 |
-
return error_msg, "", "", "\n".join(log_entries), "" # Return 5 values for outputs
|
95 |
-
|
96 |
current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key)
|
97 |
-
if not current_model_config or current_model_config["type"] == "none":
|
98 |
-
|
99 |
-
|
100 |
-
return error_msg, "", "", "\n".join(log_entries), ""
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
log_entries.append(f"User Unit Tests Provided: {'Yes' if user_provided_tests_code.strip() else 'No'}")
|
105 |
-
|
106 |
-
# Prepare LLM configurations for each stage
|
107 |
llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens}
|
108 |
-
|
109 |
-
llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens}
|
110 |
|
111 |
-
# --- STAGE 1: GENESIS ---
|
112 |
-
progress(0.05, desc="Stage 1: Genesis Engine - Generating Solutions...")
|
113 |
-
log_entries.append("\n**------ STAGE 1: GENESIS ENGINE ------**")
|
114 |
-
|
115 |
-
initial_raw_solutions = generate_initial_solutions(
|
116 |
-
problem_description_text, initial_hints_text, problem_type_selected,
|
117 |
-
num_initial_solutions_to_gen, llm_config_genesis
|
118 |
-
)
|
119 |
-
log_entries.append(f"Genesis Engine produced {len(initial_raw_solutions)} raw solution candidate(s).")
|
120 |
-
for i, sol_text in enumerate(initial_raw_solutions):
|
121 |
-
log_entries.append(f" Candidate {i+1} (Raw Snippet): {str(sol_text)[:120]}...") # str() for safety
|
122 |
|
123 |
# --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION ---
|
124 |
-
progress(0.25, desc="Stage 2:
|
125 |
-
log_entries.append("\n**------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EVALUATION ------**")
|
126 |
-
|
127 |
-
|
128 |
|
129 |
for i, candidate_solution_text in enumerate(initial_raw_solutions):
|
130 |
-
|
131 |
-
progress(current_progress, desc=f"Evaluating Candidate {i+1} of {num_initial_solutions_to_gen}...")
|
132 |
log_entries.append(f"\n--- Evaluating Candidate {i+1} ---")
|
133 |
-
|
134 |
-
|
135 |
-
str(candidate_solution_text), # Ensure it's a string before passing
|
136 |
-
problem_description_text, problem_type_selected,
|
137 |
user_provided_tests_code, llm_config_critique
|
138 |
)
|
139 |
-
|
140 |
-
log_entries.append(f" Final Combined Score: {evaluation_obj.score}/10")
|
141 |
-
log_entries.append(f" Automated Tests: {evaluation_obj.passed_tests}/{evaluation_obj.total_tests} passed.")
|
142 |
-
if evaluation_obj.execution_summary: log_entries.append(f" Execution Summary: {evaluation_obj.execution_summary}")
|
143 |
-
log_entries.append(f" LLM Critique (Snippet): {str(evaluation_obj.critique_text)[:150]}...")
|
144 |
-
|
145 |
evaluated_candidates_list.append({
|
146 |
"id": i + 1,
|
147 |
-
"solution_text": str(candidate_solution_text),
|
148 |
-
"
|
149 |
})
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
-
# Format display for initial solutions & evaluations
|
152 |
initial_solutions_display_markdown = []
|
153 |
for data in evaluated_candidates_list:
|
154 |
initial_solutions_display_markdown.append(
|
155 |
-
f"**Candidate {data['id']}:**\n"
|
156 |
-
|
157 |
-
f"```python\n{data['solution_text']}\n```\n\n"
|
158 |
-
f"**Evaluation Verdict (Combined Score: {data['evaluation_result'].score}/10):**\n"
|
159 |
-
f"{data['evaluation_result'].critique_text}\n---"
|
160 |
)
|
161 |
|
162 |
# --- STAGE 3: SELECTION OF CHAMPION ---
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
potentially_viable_candidates = [
|
167 |
cand for cand in evaluated_candidates_list
|
168 |
-
if cand["
|
169 |
cand["solution_text"] and not str(cand["solution_text"]).startswith("ERROR")
|
170 |
]
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
log_entries.append(f" CRITICAL: {final_error_msg}")
|
175 |
-
return "\n\n".join(initial_solutions_display_markdown), final_error_msg, "", "\n".join(log_entries), ""
|
176 |
-
|
177 |
-
potentially_viable_candidates.sort(key=lambda x: x["evaluation_result"].score, reverse=True)
|
178 |
champion_candidate_data = potentially_viable_candidates[0]
|
179 |
-
|
180 |
-
log_entries.append(f"Champion Selected: Candidate {champion_candidate_data['id']} "
|
181 |
-
f"(Solution Snippet: {str(champion_candidate_data['solution_text'])[:60]}...) "
|
182 |
-
f"with evaluation score {champion_candidate_data['evaluation_result'].score}/10.")
|
183 |
-
|
184 |
champion_display_markdown = (
|
185 |
f"**Champion Candidate ID: {champion_candidate_data['id']} "
|
186 |
-
f"(Original Combined Score: {champion_candidate_data['
|
187 |
f"```python\n{champion_candidate_data['solution_text']}\n```\n\n"
|
188 |
-
f"**Original Comprehensive Evaluation for this Champion:**\n"
|
189 |
-
f"{champion_candidate_data['evaluation_result'].critique_text}"
|
190 |
)
|
191 |
|
|
|
192 |
# --- STAGE 4: EVOLUTIONARY FORGE ---
|
193 |
-
progress(0.75, desc="Stage 4:
|
194 |
-
log_entries.append("\n**------ STAGE 4: EVOLUTIONARY FORGE ------**")
|
|
|
195 |
|
196 |
evolved_solution_code = evolve_solution(
|
197 |
str(champion_candidate_data["solution_text"]),
|
198 |
-
|
199 |
-
champion_candidate_data["
|
200 |
problem_description_text,
|
201 |
problem_type_selected,
|
202 |
llm_config_evolution
|
203 |
)
|
204 |
-
|
205 |
-
|
206 |
evolved_solution_display_markdown = ""
|
207 |
-
ai_test_analysis_markdown = ""
|
208 |
|
209 |
if str(evolved_solution_code).startswith("ERROR"):
|
210 |
evolved_solution_display_markdown = f"**Evolution Stage Failed:**\n{evolved_solution_code}"
|
211 |
else:
|
212 |
-
evolved_solution_display_markdown = f"**✨ AlgoForge
|
213 |
|
|
|
214 |
if "python" in problem_type_selected.lower() and user_provided_tests_code.strip():
|
215 |
-
progress(0.9, desc="Post-Evolution: Re-
|
216 |
-
log_entries.append("\n--- Post-Evolution
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
evolved_code_eval_result = evaluate_solution_candidate(
|
221 |
-
str(evolved_solution_code), problem_description_text, problem_type_selected,
|
222 |
-
user_provided_tests_code, evolved_critique_config
|
223 |
)
|
224 |
|
225 |
evolved_solution_display_markdown += (
|
226 |
f"\n\n**Post-Evolution Automated Test Results (Simulated):**\n"
|
227 |
-
f"{
|
228 |
-
f"Passed:
|
|
|
229 |
)
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
|
|
232 |
|
233 |
-
|
|
|
|
|
234 |
progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...")
|
235 |
-
|
236 |
-
analysis_user_prompt = format_code_test_analysis_user_prompt(
|
237 |
-
|
238 |
-
user_provided_tests_code,
|
239 |
-
str(evolved_code_eval_result.execution_summary)
|
240 |
-
)
|
241 |
-
analysis_system_prompt = get_system_prompt("code_execution_explainer")
|
242 |
-
|
243 |
-
llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"],
|
244 |
-
"temp": 0.3, "max_tokens": critique_max_tokens + 150}
|
245 |
-
|
246 |
from core.llm_clients import call_huggingface_api, call_gemini_api
|
247 |
-
|
248 |
explanation_response_obj = None
|
249 |
-
if llm_analysis_config["type"] == "hf":
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
|
254 |
-
if explanation_response_obj and explanation_response_obj.success:
|
255 |
-
ai_test_analysis_markdown = f"**AI Analysis of Evolved Code's Test Performance:**\n{explanation_response_obj.text}"
|
256 |
-
log_entries.append(f" AI Test Analysis (Snippet): {str(explanation_response_obj.text)[:100]}...")
|
257 |
-
elif explanation_response_obj:
|
258 |
-
ai_test_analysis_markdown = f"**AI Analysis of Test Performance Failed:**\n{explanation_response_obj.error}"
|
259 |
-
log_entries.append(f" AI Test Analysis Error: {explanation_response_obj.error}")
|
260 |
|
|
|
261 |
total_time = time.time() - start_time
|
262 |
-
log_entries.append(f"\n**AlgoForge
|
263 |
progress(1.0, desc="Cycle Complete!")
|
264 |
-
|
265 |
return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown
|
266 |
|
267 |
|
268 |
# --- Gradio UI Definition ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
intro_markdown = """
|
270 |
-
# ✨ AlgoForge
|
271 |
-
This version
|
272 |
-
featuring (simulated)
|
273 |
|
274 |
-
**API Keys Required in Space Secrets
|
275 |
-
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
|
276 |
- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
|
277 |
"""
|
278 |
-
|
279 |
ui_token_status_md = ""
|
280 |
-
if not GEMINI_API_READY and not HF_API_READY:
|
281 |
-
ui_token_status_md = "<p style='color:red;'>⚠️ **CRITICAL: NEITHER GOOGLE_API_KEY NOR HF_TOKEN are configured or working correctly.** The application will not be able to call any LLMs.</p>"
|
282 |
else:
|
283 |
-
if GEMINI_API_READY: ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API
|
284 |
-
else: ui_token_status_md += "<p style='color:orange;'>⚠️
|
285 |
-
if HF_API_READY: ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API
|
286 |
-
else: ui_token_status_md += "<p style='color:orange;'>⚠️
|
287 |
|
288 |
-
|
289 |
-
with gr.Blocks(theme=gr.themes.Soft(primary_hue="green", secondary_hue="lime"), title="AlgoForge Prime™ (1.5 Focus)") as app_demo:
|
290 |
gr.Markdown(intro_markdown)
|
291 |
gr.HTML(ui_token_status_md)
|
292 |
-
|
293 |
-
#
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
)
|
298 |
-
|
299 |
if not usable_models_available:
|
300 |
-
gr.Markdown("<h2 style='color:red;'>No LLM models
|
301 |
else:
|
302 |
with gr.Row():
|
303 |
-
# Input Column
|
304 |
-
|
|
|
|
|
|
|
305 |
gr.Markdown("## 💡 1. Define the Challenge")
|
306 |
-
problem_type_dropdown = gr.Dropdown(
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
)
|
311 |
-
problem_description_textbox = gr.Textbox(
|
312 |
-
lines=5, label="Problem Description / Desired Outcome",
|
313 |
-
placeholder="Example for 'Python Algorithm with Tests':\n`def calculate_factorial(n: int) -> int:`\nCalculates factorial of n. Should handle n=0 (returns 1) and raise ValueError for n<0."
|
314 |
-
)
|
315 |
-
initial_hints_textbox = gr.Textbox(
|
316 |
-
lines=3, label="Initial Thoughts / Constraints (Optional)",
|
317 |
-
placeholder="E.g., 'Prefer an iterative solution over recursive for factorial.' or 'Consider time complexity.'"
|
318 |
-
)
|
319 |
-
user_tests_textbox = gr.Textbox(
|
320 |
-
lines=6, label="Python Unit Tests (Optional, one `assert` per line)",
|
321 |
-
placeholder="assert calculate_factorial(0) == 1\nassert calculate_factorial(5) == 120\n# For expected errors (advanced, not fully simulated here):\n# try:\n# calculate_factorial(-1)\n# assert False, \"ValueError not raised\"\n# except ValueError:\n# assert True",
|
322 |
-
info="For 'Python Algorithm with Tests'. Ensure function names match your problem description."
|
323 |
-
)
|
324 |
-
|
325 |
gr.Markdown("## ⚙️ 2. Configure The Forge")
|
326 |
-
model_selection_dropdown = gr.Dropdown(
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
critique_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.05, label="Critique Temp")
|
340 |
-
critique_max_tokens_slider = gr.Slider(minimum=150, maximum=2048, value=512, step=64, label="Critique Max Output Tokens")
|
341 |
-
with gr.Row():
|
342 |
-
evolution_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.75, step=0.05, label="Evolution Temp")
|
343 |
-
evolution_max_tokens_slider = gr.Slider(minimum=256, maximum=4096, value=1536, step=128, label="Evolution Max Output Tokens")
|
344 |
-
|
345 |
-
engage_button = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg", elem_id="engage_button_elem")
|
346 |
-
|
347 |
-
# Output Column
|
348 |
-
with gr.Column(scale=3):
|
349 |
gr.Markdown("## 🔥 3. The Forge's Output")
|
350 |
-
with gr.Tabs(
|
351 |
-
with gr.TabItem("📜
|
352 |
-
|
353 |
-
with gr.TabItem("
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
output_ai_test_analysis_markdown = gr.Markdown(label="AI Analysis of Evolved Code's Test Performance")
|
358 |
-
with gr.TabItem("🛠️ Interaction Log (Developer View)", id="tab_log"):
|
359 |
-
output_interaction_log_markdown = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
|
360 |
|
361 |
-
# Connect button to the orchestration function
|
362 |
engage_button.click(
|
363 |
fn=run_algoforge_simulation_orchestrator,
|
364 |
-
inputs=[
|
365 |
-
|
366 |
-
num_initial_solutions_slider, model_selection_dropdown,
|
367 |
-
genesis_temp_slider, genesis_max_tokens_slider,
|
368 |
-
critique_temp_slider, critique_max_tokens_slider,
|
369 |
-
evolution_temp_slider, evolution_max_tokens_slider
|
370 |
-
],
|
371 |
-
outputs=[
|
372 |
-
output_initial_solutions_markdown, output_champion_markdown,
|
373 |
-
output_evolved_markdown, output_interaction_log_markdown,
|
374 |
-
output_ai_test_analysis_markdown # Matched to the 5 outputs of orchestrator
|
375 |
-
]
|
376 |
)
|
377 |
-
|
378 |
gr.Markdown("---")
|
379 |
-
gr.Markdown(
|
380 |
-
|
381 |
-
"The (simulated) unit testing feature is for illustrative purposes. "
|
382 |
-
"**NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
|
383 |
-
"Implementing robust and secure code sandboxing is complex and absolutely critical for safety in real-world applications. "
|
384 |
-
"LLM outputs always require careful human review and verification."
|
385 |
-
)
|
386 |
|
387 |
# --- Entry Point for Running the Gradio App ---
|
388 |
if __name__ == "__main__":
|
389 |
print("="*80)
|
390 |
-
print("AlgoForge
|
391 |
-
|
392 |
-
print(f"
|
393 |
-
|
394 |
-
if not GEMINI_API_READY and not HF_API_READY:
|
395 |
-
print(" CRITICAL WARNING: No API keys seem to be configured correctly. The application will likely be non-functional.")
|
396 |
-
print(f" UI Default Model Key: {UI_DEFAULT_MODEL_KEY}")
|
397 |
-
print(f" Available models for UI: {list(AVAILABLE_MODELS_CONFIG.keys())}")
|
398 |
-
print("="*80)
|
399 |
app_demo.launch(debug=True, server_name="0.0.0.0")
|
|
|
1 |
# algoforge_prime/app.py
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
+
import time
|
5 |
|
6 |
+
from core.llm_clients import initialize_all_clients, is_gemini_api_configured, is_hf_api_configured
|
7 |
+
initialize_all_clients()
|
|
|
|
|
8 |
|
|
|
9 |
GEMINI_API_READY = is_gemini_api_configured()
|
10 |
HF_API_READY = is_hf_api_configured()
|
11 |
|
12 |
from core.generation_engine import generate_initial_solutions
|
13 |
+
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResultOutput # Use new class name
|
14 |
from core.evolution_engine import evolve_solution
|
15 |
from prompts.system_prompts import get_system_prompt
|
16 |
from prompts.prompt_templates import format_code_test_analysis_user_prompt
|
17 |
+
from core.safe_executor import execute_python_code_with_tests, ExecutionResult # For re-evaluating evolved code
|
18 |
|
19 |
# --- Application Configuration (Models, Defaults) ---
|
20 |
+
# ... (Keep your AVAILABLE_MODELS_CONFIG and UI_DEFAULT_MODEL_KEY logic as in the previous full app.py)
|
21 |
AVAILABLE_MODELS_CONFIG = {}
|
22 |
UI_DEFAULT_MODEL_KEY = None
|
|
|
|
|
|
|
23 |
GEMINI_1_5_PRO_LATEST_ID = "gemini-1.5-pro-latest"
|
24 |
GEMINI_1_5_FLASH_LATEST_ID = "gemini-1.5-flash-latest"
|
|
|
|
|
25 |
if GEMINI_API_READY:
|
26 |
AVAILABLE_MODELS_CONFIG.update({
|
27 |
f"Google Gemini 1.5 Pro (API - Recommended)": {"id": GEMINI_1_5_PRO_LATEST_ID, "type": "google_gemini"},
|
28 |
f"Google Gemini 1.5 Flash (API - Fast)": {"id": GEMINI_1_5_FLASH_LATEST_ID, "type": "google_gemini"},
|
|
|
29 |
"Google Gemini 1.0 Pro (API - Legacy)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
|
30 |
})
|
|
|
31 |
UI_DEFAULT_MODEL_KEY = f"Google Gemini 1.5 Pro (API - Recommended)"
|
32 |
+
if UI_DEFAULT_MODEL_KEY not in AVAILABLE_MODELS_CONFIG: UI_DEFAULT_MODEL_KEY = f"Google Gemini 1.5 Flash (API - Fast)"
|
33 |
+
else: print("WARNING: app.py - Gemini API not configured; Gemini models will be unavailable.")
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
if HF_API_READY:
|
35 |
AVAILABLE_MODELS_CONFIG.update({
|
36 |
"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
|
37 |
"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
|
|
|
38 |
})
|
39 |
+
if not UI_DEFAULT_MODEL_KEY: UI_DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
|
40 |
+
else: print("WARNING: app.py - Hugging Face API not configured; HF models will be unavailable.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
if not AVAILABLE_MODELS_CONFIG:
|
|
|
42 |
AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys & Restart)"] = {"id": "dummy_error", "type": "none"}
|
43 |
UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys & Restart)"
|
44 |
+
elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG: UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0]
|
|
|
|
|
|
|
45 |
|
46 |
|
47 |
# --- Main Orchestration Logic for Gradio ---
|
48 |
def run_algoforge_simulation_orchestrator(
|
49 |
+
problem_type_selected: str, problem_description_text: str, initial_hints_text: str,
|
50 |
+
user_provided_tests_code: str, num_initial_solutions_to_gen: int, selected_model_ui_key: str,
|
51 |
+
genesis_temp: float, genesis_max_tokens: int, critique_temp: float, critique_max_tokens: int,
|
|
|
|
|
|
|
|
|
|
|
52 |
evolution_temp: float, evolution_max_tokens: int,
|
53 |
+
progress=gr.Progress(track_tqdm=True)
|
54 |
):
|
55 |
progress(0, desc="Initializing AlgoForge Prime™...")
|
56 |
+
log_entries = [f"**AlgoForge Prime™ Omega Conceptual Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}**"]
|
57 |
+
# ... (input validation and model config setup as before) ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key)
|
59 |
+
if not current_model_config or current_model_config["type"] == "none": # Handle missing config
|
60 |
+
return "ERROR: Model configuration not found or invalid. Check API keys.", "", "", "\n".join(log_entries), ""
|
61 |
+
# ...
|
|
|
62 |
|
63 |
+
# --- STAGE 1: GENESIS ---
|
64 |
+
# ... (generate_initial_solutions call as before) ...
|
|
|
|
|
|
|
65 |
llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens}
|
66 |
+
initial_raw_solutions = generate_initial_solutions(problem_description_text, initial_hints_text, problem_type_selected, num_initial_solutions_to_gen, llm_config_genesis)
|
|
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
# --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION ---
|
70 |
+
progress(0.25, desc="Stage 2: Evaluating Candidates with LLM & (Simulated) Tests...")
|
71 |
+
log_entries.append("\n**------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EXECUTION/EVALUATION ------**")
|
72 |
+
evaluated_candidates_list = []
|
73 |
+
llm_config_critique = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": critique_temp, "max_tokens": critique_max_tokens}
|
74 |
|
75 |
for i, candidate_solution_text in enumerate(initial_raw_solutions):
|
76 |
+
# ... (progress update) ...
|
|
|
77 |
log_entries.append(f"\n--- Evaluating Candidate {i+1} ---")
|
78 |
+
evaluation_output_obj = evaluate_solution_candidate( # type: EvaluationResultOutput
|
79 |
+
str(candidate_solution_text), problem_description_text, problem_type_selected,
|
|
|
|
|
80 |
user_provided_tests_code, llm_config_critique
|
81 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
evaluated_candidates_list.append({
|
83 |
"id": i + 1,
|
84 |
+
"solution_text": str(candidate_solution_text),
|
85 |
+
"evaluation_obj": evaluation_output_obj # Store the whole object
|
86 |
})
|
87 |
+
log_entries.append(f" Combined Score: {evaluation_output_obj.combined_score}/10")
|
88 |
+
if evaluation_output_obj.execution_details:
|
89 |
+
log_entries.append(f" Test Results: {evaluation_output_obj.execution_details.passed_tests}/{evaluation_output_obj.execution_details.total_tests} passed.")
|
90 |
+
if evaluation_output_obj.execution_details.error: log_entries.append(f" Execution Error: {evaluation_output_obj.execution_details.error}")
|
91 |
+
log_entries.append(f" LLM Critique (Snippet): {str(evaluation_output_obj.llm_critique_text)[:150]}...")
|
92 |
|
|
|
93 |
initial_solutions_display_markdown = []
|
94 |
for data in evaluated_candidates_list:
|
95 |
initial_solutions_display_markdown.append(
|
96 |
+
f"**Candidate {data['id']}:**\n```python\n{data['solution_text']}\n```\n\n"
|
97 |
+
f"**Evaluation Verdict (Combined Score: {data['evaluation_obj'].combined_score}/10):**\n{data['evaluation_obj'].get_display_critique()}\n---" # Use method
|
|
|
|
|
|
|
98 |
)
|
99 |
|
100 |
# --- STAGE 3: SELECTION OF CHAMPION ---
|
101 |
+
# ... (selection logic as before, but use `data['evaluation_obj'].combined_score`) ...
|
102 |
+
progress(0.7, desc="Stage 3: Selecting Champion...")
|
|
|
103 |
potentially_viable_candidates = [
|
104 |
cand for cand in evaluated_candidates_list
|
105 |
+
if cand["evaluation_obj"] and cand["evaluation_obj"].combined_score > 0 and \
|
106 |
cand["solution_text"] and not str(cand["solution_text"]).startswith("ERROR")
|
107 |
]
|
108 |
+
if not potentially_viable_candidates: # Handle no viable candidates
|
109 |
+
return "\n\n".join(initial_solutions_display_markdown), "No viable candidates after evaluation.", "", "\n".join(log_entries), ""
|
110 |
+
potentially_viable_candidates.sort(key=lambda x: x["evaluation_obj"].combined_score, reverse=True)
|
|
|
|
|
|
|
|
|
111 |
champion_candidate_data = potentially_viable_candidates[0]
|
|
|
|
|
|
|
|
|
|
|
112 |
champion_display_markdown = (
|
113 |
f"**Champion Candidate ID: {champion_candidate_data['id']} "
|
114 |
+
f"(Original Combined Score: {champion_candidate_data['evaluation_obj'].combined_score}/10):**\n"
|
115 |
f"```python\n{champion_candidate_data['solution_text']}\n```\n\n"
|
116 |
+
f"**Original Comprehensive Evaluation for this Champion:**\n{champion_candidate_data['evaluation_obj'].get_display_critique()}"
|
|
|
117 |
)
|
118 |
|
119 |
+
|
120 |
# --- STAGE 4: EVOLUTIONARY FORGE ---
|
121 |
+
progress(0.75, desc="Stage 4: Evolving Champion...")
|
122 |
+
log_entries.append("\n**------ STAGE 4: EVOLUTIONARY FORGE (Informed by Tests) ------**")
|
123 |
+
llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens}
|
124 |
|
125 |
evolved_solution_code = evolve_solution(
|
126 |
str(champion_candidate_data["solution_text"]),
|
127 |
+
champion_candidate_data["evaluation_obj"], # Pass the whole EvaluationResultOutput object
|
128 |
+
# champion_candidate_data["evaluation_obj"].combined_score, # Score is inside the object
|
129 |
problem_description_text,
|
130 |
problem_type_selected,
|
131 |
llm_config_evolution
|
132 |
)
|
133 |
+
# ... (log evolved solution snippet) ...
|
|
|
134 |
evolved_solution_display_markdown = ""
|
135 |
+
ai_test_analysis_markdown = ""
|
136 |
|
137 |
if str(evolved_solution_code).startswith("ERROR"):
|
138 |
evolved_solution_display_markdown = f"**Evolution Stage Failed:**\n{evolved_solution_code}"
|
139 |
else:
|
140 |
+
evolved_solution_display_markdown = f"**✨ AlgoForge Omega™ Evolved Artifact ✨:**\n```python\n{evolved_solution_code}\n```"
|
141 |
|
142 |
+
# Re-evaluate the evolved solution with unit tests
|
143 |
if "python" in problem_type_selected.lower() and user_provided_tests_code.strip():
|
144 |
+
progress(0.9, desc="Post-Evolution: Re-testing Evolved Code...")
|
145 |
+
log_entries.append("\n--- Post-Evolution Test of Evolved Code ---")
|
146 |
+
# Use the actual safe_executor here directly for testing evolved code
|
147 |
+
evolved_code_exec_result = execute_python_code_with_tests( # type: ExecutionResult
|
148 |
+
str(evolved_solution_code), user_provided_tests_code, timeout_seconds=10
|
|
|
|
|
|
|
149 |
)
|
150 |
|
151 |
evolved_solution_display_markdown += (
|
152 |
f"\n\n**Post-Evolution Automated Test Results (Simulated):**\n"
|
153 |
+
f" Tests Attempted: {evolved_code_exec_result.total_tests}\n"
|
154 |
+
f" Tests Passed: {evolved_code_exec_result.passed_tests}\n"
|
155 |
+
f" Execution Time: {evolved_code_exec_result.execution_time:.4f}s\n"
|
156 |
)
|
157 |
+
if evolved_code_exec_result.error:
|
158 |
+
evolved_solution_display_markdown += f" Execution Error/Output: {evolved_code_exec_result.error}\n"
|
159 |
+
elif evolved_code_exec_result.output:
|
160 |
+
evolved_solution_display_markdown += f" Execution Output (stdout):\n```\n{evolved_code_exec_result.output[:300]}\n```\n"
|
161 |
+
|
162 |
+
log_entries.append(f" Evolved Code Test Results: {evolved_code_exec_result}")
|
163 |
|
164 |
+
# Get LLM to explain the test results of the evolved code
|
165 |
+
if evolved_code_exec_result.total_tests > 0 :
|
166 |
+
# ... (AI Test Analysis call as before, using evolved_code_exec_result.error or .output for summary) ...
|
167 |
progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...")
|
168 |
+
analysis_exec_summary = evolved_code_exec_result.error if evolved_code_exec_result.error else (evolved_code_exec_result.output if evolved_code_exec_result.output else "Tests completed.")
|
169 |
+
analysis_user_prompt = format_code_test_analysis_user_prompt(str(evolved_solution_code), user_provided_tests_code, f"Passed: {evolved_code_exec_result.passed_tests}/{evolved_code_exec_result.total_tests}. Detail: {analysis_exec_summary}")
|
170 |
+
# ... (rest of AI analysis call) ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
from core.llm_clients import call_huggingface_api, call_gemini_api
|
172 |
+
llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": 0.3, "max_tokens": critique_max_tokens + 150}
|
173 |
explanation_response_obj = None
|
174 |
+
if llm_analysis_config["type"] == "hf": explanation_response_obj = call_huggingface_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], get_system_prompt("code_execution_explainer"))
|
175 |
+
elif llm_analysis_config["type"] == "google_gemini": explanation_response_obj = call_gemini_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], get_system_prompt("code_execution_explainer"))
|
176 |
+
if explanation_response_obj and explanation_response_obj.success: ai_test_analysis_markdown = f"**AI Analysis of Evolved Code's Test Performance:**\n{explanation_response_obj.text}"
|
177 |
+
elif explanation_response_obj: ai_test_analysis_markdown = f"**AI Analysis Failed:**\n{explanation_response_obj.error}"
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
+
# ... (Total time logging and return statement as before) ...
|
181 |
total_time = time.time() - start_time
|
182 |
+
log_entries.append(f"\n**AlgoForge Omega Cycle Complete. Total time: {total_time:.2f} seconds.**")
|
183 |
progress(1.0, desc="Cycle Complete!")
|
|
|
184 |
return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown
|
185 |
|
186 |
|
187 |
# --- Gradio UI Definition ---
|
188 |
+
# (This is IDENTICAL to the UI in the previous full app.py where you introduced the user_tests_tb)
|
189 |
+
# For brevity, ensure it's copied correctly. Key parts:
|
190 |
+
# - `intro_markdown` with updated title "AlgoForge Omega™ Conceptual Demo"
|
191 |
+
# - `ui_token_status_md` based on GEMINI_API_READY, HF_API_READY
|
192 |
+
# - `user_tests_tb` Gradio Textbox component
|
193 |
+
# - All sliders and dropdowns
|
194 |
+
# - `engage_button.click` connecting to `run_algoforge_simulation_orchestrator` with all inputs/outputs.
|
195 |
+
# ... PASTE THE FULL UI DEFINITION HERE from your last complete app.py version ...
|
196 |
intro_markdown = """
|
197 |
+
# ✨ AlgoForge Omega™ ✨: Conceptual Demo with (Simulated) Execution
|
198 |
+
This version demonstrates a conceptual workflow for AI-assisted algorithm discovery and refinement,
|
199 |
+
featuring **(simulated) execution of generated Python code against user-provided unit tests**.
|
200 |
|
201 |
+
**API Keys Required in Space Secrets:**
|
202 |
+
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
|
203 |
- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
|
204 |
"""
|
205 |
+
# ... (ui_token_status_md as before) ...
|
206 |
ui_token_status_md = ""
|
207 |
+
if not GEMINI_API_READY and not HF_API_READY: ui_token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP NON-FUNCTIONAL.</p>"
|
|
|
208 |
else:
|
209 |
+
if GEMINI_API_READY: ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API Configured.</p>"
|
210 |
+
else: ui_token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed.</p>"
|
211 |
+
if HF_API_READY: ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API Configured.</p>"
|
212 |
+
else: ui_token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed.</p>"
|
213 |
|
214 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", secondary_hue="pink"), title="AlgoForge Omega™ Demo") as app_demo: # New theme
|
|
|
215 |
gr.Markdown(intro_markdown)
|
216 |
gr.HTML(ui_token_status_md)
|
217 |
+
# ... (Full UI layout as provided in the "write all the files" response, ensuring all inputs/outputs match orchestrator)
|
218 |
+
# This is the same UI structure as the last full app.py I provided.
|
219 |
+
# Ensure all inputs to engage_button.click match the orchestrator's signature.
|
220 |
+
# For brevity, I'm showing the structure. Copy from the previous full version.
|
221 |
+
usable_models_available = any(AVAILABLE_MODELS_CONFIG.get(key, {}).get("type") != "none" for key in AVAILABLE_MODELS_CONFIG)
|
|
|
|
|
222 |
if not usable_models_available:
|
223 |
+
gr.Markdown("<h2 style='color:red;'>No LLM models available for use. Check API keys and restart.</h2>")
|
224 |
else:
|
225 |
with gr.Row():
|
226 |
+
with gr.Column(scale=2): # Input Column
|
227 |
+
# ... (problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox)
|
228 |
+
# ... (model_selection_dropdown, num_initial_solutions_slider)
|
229 |
+
# ... (Accordion with LLM parameter sliders)
|
230 |
+
# ... (engage_button)
|
231 |
gr.Markdown("## 💡 1. Define the Challenge")
|
232 |
+
problem_type_dropdown = gr.Dropdown(choices=["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea"], label="Problem Type", value="Python Algorithm with Tests")
|
233 |
+
problem_description_textbox = gr.Textbox(lines=5, label="Problem Description")
|
234 |
+
initial_hints_textbox = gr.Textbox(lines=3, label="Initial Hints (Optional)")
|
235 |
+
user_tests_textbox = gr.Textbox(lines=6, label="Python Unit Tests (Optional, one `assert` per line)", placeholder="assert my_func(1) == 1")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
gr.Markdown("## ⚙️ 2. Configure The Forge")
|
237 |
+
model_selection_dropdown = gr.Dropdown(choices=list(AVAILABLE_MODELS_CONFIG.keys()), value=UI_DEFAULT_MODEL_KEY, label="LLM Core Model")
|
238 |
+
num_initial_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="# Initial Solutions")
|
239 |
+
with gr.Accordion("Advanced LLM Parameters", open=False):
|
240 |
+
genesis_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp")
|
241 |
+
genesis_max_tokens_slider = gr.Slider(256, 4096, value=1024, step=128, label="Genesis Max Tokens")
|
242 |
+
critique_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Critique Temp")
|
243 |
+
critique_max_tokens_slider = gr.Slider(150, 2048, value=512, step=64, label="Critique Max Tokens")
|
244 |
+
evolution_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp")
|
245 |
+
evolution_max_tokens_slider = gr.Slider(256, 4096, value=1536, step=128, label="Evolution Max Tokens")
|
246 |
+
engage_button = gr.Button("🚀 ENGAGE ALGOFORGE OMEGA™ 🚀", variant="primary")
|
247 |
+
|
248 |
+
|
249 |
+
with gr.Column(scale=3): # Output Column
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
gr.Markdown("## 🔥 3. The Forge's Output")
|
251 |
+
with gr.Tabs():
|
252 |
+
with gr.TabItem("📜 Candidates & Evaluations"): output_initial_solutions_markdown = gr.Markdown()
|
253 |
+
with gr.TabItem("🏆 Champion"): output_champion_markdown = gr.Markdown()
|
254 |
+
with gr.TabItem("🌟 Evolved & Tested"):
|
255 |
+
output_evolved_markdown = gr.Markdown()
|
256 |
+
output_ai_test_analysis_markdown = gr.Markdown()
|
257 |
+
with gr.TabItem("🛠️ Log"): output_interaction_log_markdown = gr.Markdown()
|
|
|
|
|
|
|
258 |
|
|
|
259 |
engage_button.click(
|
260 |
fn=run_algoforge_simulation_orchestrator,
|
261 |
+
inputs=[ problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox, num_initial_solutions_slider, model_selection_dropdown, genesis_temp_slider, genesis_max_tokens_slider, critique_temp_slider, critique_max_tokens_slider, evolution_temp_slider, evolution_max_tokens_slider ],
|
262 |
+
outputs=[ output_initial_solutions_markdown, output_champion_markdown, output_evolved_markdown, output_interaction_log_markdown, output_ai_test_analysis_markdown ]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
)
|
|
|
264 |
gr.Markdown("---")
|
265 |
+
gr.Markdown("**Disclaimer:** Conceptual Omega Demo. (Simulated) unit testing. **NEVER run untrusted LLM code without robust sandboxing.**")
|
266 |
+
|
|
|
|
|
|
|
|
|
|
|
267 |
|
268 |
# --- Entry Point for Running the Gradio App ---
|
269 |
if __name__ == "__main__":
|
270 |
print("="*80)
|
271 |
+
print("AlgoForge Omega™ Conceptual Demo - Launching...")
|
272 |
+
print(f" Gemini API Ready: {GEMINI_API_READY}")
|
273 |
+
print(f" HF API Ready: {HF_API_READY}")
|
274 |
+
# ... (other startup prints)
|
|
|
|
|
|
|
|
|
|
|
275 |
app_demo.launch(debug=True, server_name="0.0.0.0")
|