mgbam commited on
Commit
6aa264c
·
verified ·
1 Parent(s): 1cc90f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -271
app.py CHANGED
@@ -1,399 +1,275 @@
1
  # algoforge_prime/app.py
2
  import gradio as gr
3
  import os
4
- import time # For progress updates
5
 
6
- # --- Core Logic Imports ---
7
- # Initialize clients first to ensure API keys are loaded before other modules use them.
8
- from core.llm_clients import initialize_all_clients, is_gemini_api_configured, is_hf_api_configured # Use getters
9
- initialize_all_clients() # CRITICAL: Call initialization first
10
 
11
- # Now get the status AFTER initialization
12
  GEMINI_API_READY = is_gemini_api_configured()
13
  HF_API_READY = is_hf_api_configured()
14
 
15
  from core.generation_engine import generate_initial_solutions
16
- from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult # Class for typed results
17
  from core.evolution_engine import evolve_solution
18
  from prompts.system_prompts import get_system_prompt
19
  from prompts.prompt_templates import format_code_test_analysis_user_prompt
 
20
 
21
  # --- Application Configuration (Models, Defaults) ---
 
22
  AVAILABLE_MODELS_CONFIG = {}
23
  UI_DEFAULT_MODEL_KEY = None
24
-
25
- # Define Gemini 1.5 model IDs (use the exact strings from Google's documentation)
26
- # These are common aliases; specific versioned IDs might also be available.
27
  GEMINI_1_5_PRO_LATEST_ID = "gemini-1.5-pro-latest"
28
  GEMINI_1_5_FLASH_LATEST_ID = "gemini-1.5-flash-latest"
29
-
30
- # Populate with Gemini models first if API is configured
31
  if GEMINI_API_READY:
32
  AVAILABLE_MODELS_CONFIG.update({
33
  f"Google Gemini 1.5 Pro (API - Recommended)": {"id": GEMINI_1_5_PRO_LATEST_ID, "type": "google_gemini"},
34
  f"Google Gemini 1.5 Flash (API - Fast)": {"id": GEMINI_1_5_FLASH_LATEST_ID, "type": "google_gemini"},
35
- # You can add older Gemini versions here if needed
36
  "Google Gemini 1.0 Pro (API - Legacy)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
37
  })
38
- # Prioritize 1.5 Pro as default
39
  UI_DEFAULT_MODEL_KEY = f"Google Gemini 1.5 Pro (API - Recommended)"
40
- # Fallback to Flash if Pro key somehow isn't in dict (shouldn't happen with this logic)
41
- if UI_DEFAULT_MODEL_KEY not in AVAILABLE_MODELS_CONFIG: # Should not be needed if Pro ID is valid
42
- UI_DEFAULT_MODEL_KEY = f"Google Gemini 1.5 Flash (API - Fast)"
43
- print(f"INFO: app.py - Gemini models populated. Default set to: {UI_DEFAULT_MODEL_KEY}")
44
- else:
45
- print("WARNING: app.py - Gemini API not configured (checked via getter); Gemini models will be unavailable.")
46
-
47
- # Populate with Hugging Face models if API is configured
48
  if HF_API_READY:
49
  AVAILABLE_MODELS_CONFIG.update({
50
  "Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
51
  "Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
52
- "CodeLlama 7B Instruct (HF)": {"id": "codellama/CodeLlama-7b-Instruct-hf", "type": "hf"},
53
  })
54
- if not UI_DEFAULT_MODEL_KEY: # If Gemini wasn't configured, default to an HF model
55
- UI_DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
56
- print("INFO: app.py - HF models populated; default set to an HF model as Gemini was not available.")
57
- else:
58
- print("INFO: app.py - HF models also populated as alternatives.")
59
- else:
60
- print("WARNING: app.py - Hugging Face API not configured (checked via getter); HF models will be unavailable.")
61
-
62
- # Absolute fallback if no models could be configured at all
63
  if not AVAILABLE_MODELS_CONFIG:
64
- print("CRITICAL APP ERROR: No models could be configured. Check API keys in Space Secrets and restart Space.")
65
  AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys & Restart)"] = {"id": "dummy_error", "type": "none"}
66
  UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys & Restart)"
67
- elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG: # Should not happen if logic above is correct
68
- # If somehow UI_DEFAULT_MODEL_KEY is still None, pick the first available model
69
- UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0]
70
- print(f"WARNING: app.py - UI_DEFAULT_MODEL_KEY was not set by primary logic, falling back to first available: {UI_DEFAULT_MODEL_KEY}")
71
 
72
 
73
  # --- Main Orchestration Logic for Gradio ---
74
  def run_algoforge_simulation_orchestrator(
75
- problem_type_selected: str,
76
- problem_description_text: str,
77
- initial_hints_text: str,
78
- user_provided_tests_code: str,
79
- num_initial_solutions_to_gen: int,
80
- selected_model_ui_key: str,
81
- genesis_temp: float, genesis_max_tokens: int,
82
- critique_temp: float, critique_max_tokens: int,
83
  evolution_temp: float, evolution_max_tokens: int,
84
- progress=gr.Progress(track_tqdm=True) # Gradio progress tracker
85
  ):
86
  progress(0, desc="Initializing AlgoForge Prime™...")
87
- log_entries = [f"**AlgoForge Prime™ Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}**"]
88
- start_time = time.time()
89
-
90
- # Basic input validation
91
- if not problem_description_text.strip():
92
- error_msg = "CRITICAL INPUT ERROR: Problem Description is mandatory. Please describe the problem."
93
- log_entries.append(error_msg)
94
- return error_msg, "", "", "\n".join(log_entries), "" # Return 5 values for outputs
95
-
96
  current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key)
97
- if not current_model_config or current_model_config["type"] == "none":
98
- error_msg = f"CRITICAL CONFIG ERROR: No valid LLM selected ('{selected_model_ui_key}'). This usually means API keys are missing or failed to initialize. Check Space Secrets and restart."
99
- log_entries.append(error_msg)
100
- return error_msg, "", "", "\n".join(log_entries), ""
101
 
102
- log_entries.append(f"Selected Model: {selected_model_ui_key} (Type: {current_model_config['type']}, ID: {current_model_config['id']})")
103
- log_entries.append(f"Problem Type: {problem_type_selected}")
104
- log_entries.append(f"User Unit Tests Provided: {'Yes' if user_provided_tests_code.strip() else 'No'}")
105
-
106
- # Prepare LLM configurations for each stage
107
  llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens}
108
- llm_config_critique = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": critique_temp, "max_tokens": critique_max_tokens}
109
- llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens}
110
 
111
- # --- STAGE 1: GENESIS ---
112
- progress(0.05, desc="Stage 1: Genesis Engine - Generating Solutions...")
113
- log_entries.append("\n**------ STAGE 1: GENESIS ENGINE ------**")
114
-
115
- initial_raw_solutions = generate_initial_solutions(
116
- problem_description_text, initial_hints_text, problem_type_selected,
117
- num_initial_solutions_to_gen, llm_config_genesis
118
- )
119
- log_entries.append(f"Genesis Engine produced {len(initial_raw_solutions)} raw solution candidate(s).")
120
- for i, sol_text in enumerate(initial_raw_solutions):
121
- log_entries.append(f" Candidate {i+1} (Raw Snippet): {str(sol_text)[:120]}...") # str() for safety
122
 
123
  # --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION ---
124
- progress(0.25, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
125
- log_entries.append("\n**------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EVALUATION ------**")
126
-
127
- evaluated_candidates_list = [] # Stores dicts: {"id": ..., "solution_text": ..., "evaluation_result": EvaluationResult}
128
 
129
  for i, candidate_solution_text in enumerate(initial_raw_solutions):
130
- current_progress = 0.25 + ( (i + 1) / num_initial_solutions_to_gen ) * 0.4 # Progress for evaluation stage
131
- progress(current_progress, desc=f"Evaluating Candidate {i+1} of {num_initial_solutions_to_gen}...")
132
  log_entries.append(f"\n--- Evaluating Candidate {i+1} ---")
133
-
134
- evaluation_obj = evaluate_solution_candidate( # type: EvaluationResult
135
- str(candidate_solution_text), # Ensure it's a string before passing
136
- problem_description_text, problem_type_selected,
137
  user_provided_tests_code, llm_config_critique
138
  )
139
-
140
- log_entries.append(f" Final Combined Score: {evaluation_obj.score}/10")
141
- log_entries.append(f" Automated Tests: {evaluation_obj.passed_tests}/{evaluation_obj.total_tests} passed.")
142
- if evaluation_obj.execution_summary: log_entries.append(f" Execution Summary: {evaluation_obj.execution_summary}")
143
- log_entries.append(f" LLM Critique (Snippet): {str(evaluation_obj.critique_text)[:150]}...")
144
-
145
  evaluated_candidates_list.append({
146
  "id": i + 1,
147
- "solution_text": str(candidate_solution_text),
148
- "evaluation_result": evaluation_obj
149
  })
 
 
 
 
 
150
 
151
- # Format display for initial solutions & evaluations
152
  initial_solutions_display_markdown = []
153
  for data in evaluated_candidates_list:
154
  initial_solutions_display_markdown.append(
155
- f"**Candidate {data['id']}:**\n"
156
- # Assuming python for display, adjust if problem_type varies widely in output format
157
- f"```python\n{data['solution_text']}\n```\n\n"
158
- f"**Evaluation Verdict (Combined Score: {data['evaluation_result'].score}/10):**\n"
159
- f"{data['evaluation_result'].critique_text}\n---"
160
  )
161
 
162
  # --- STAGE 3: SELECTION OF CHAMPION ---
163
- progress(0.7, desc="Stage 3: Selecting Champion Candidate...")
164
- log_entries.append("\n**------ STAGE 3: CHAMPION SELECTION ------**")
165
-
166
  potentially_viable_candidates = [
167
  cand for cand in evaluated_candidates_list
168
- if cand["evaluation_result"] and cand["evaluation_result"].score > 0 and \
169
  cand["solution_text"] and not str(cand["solution_text"]).startswith("ERROR")
170
  ]
171
-
172
- if not potentially_viable_candidates:
173
- final_error_msg = "No viable candidate solutions found after generation and evaluation. All attempts may have failed or scored too low."
174
- log_entries.append(f" CRITICAL: {final_error_msg}")
175
- return "\n\n".join(initial_solutions_display_markdown), final_error_msg, "", "\n".join(log_entries), ""
176
-
177
- potentially_viable_candidates.sort(key=lambda x: x["evaluation_result"].score, reverse=True)
178
  champion_candidate_data = potentially_viable_candidates[0]
179
-
180
- log_entries.append(f"Champion Selected: Candidate {champion_candidate_data['id']} "
181
- f"(Solution Snippet: {str(champion_candidate_data['solution_text'])[:60]}...) "
182
- f"with evaluation score {champion_candidate_data['evaluation_result'].score}/10.")
183
-
184
  champion_display_markdown = (
185
  f"**Champion Candidate ID: {champion_candidate_data['id']} "
186
- f"(Original Combined Score: {champion_candidate_data['evaluation_result'].score}/10):**\n"
187
  f"```python\n{champion_candidate_data['solution_text']}\n```\n\n"
188
- f"**Original Comprehensive Evaluation for this Champion:**\n"
189
- f"{champion_candidate_data['evaluation_result'].critique_text}"
190
  )
191
 
 
192
  # --- STAGE 4: EVOLUTIONARY FORGE ---
193
- progress(0.75, desc="Stage 4: Evolutionary Forge - Refining Champion...")
194
- log_entries.append("\n**------ STAGE 4: EVOLUTIONARY FORGE ------**")
 
195
 
196
  evolved_solution_code = evolve_solution(
197
  str(champion_candidate_data["solution_text"]),
198
- str(champion_candidate_data["evaluation_result"].critique_text),
199
- champion_candidate_data["evaluation_result"].score,
200
  problem_description_text,
201
  problem_type_selected,
202
  llm_config_evolution
203
  )
204
- log_entries.append(f"Raw Evolved Solution Text (Snippet): {str(evolved_solution_code)[:150]}...")
205
-
206
  evolved_solution_display_markdown = ""
207
- ai_test_analysis_markdown = ""
208
 
209
  if str(evolved_solution_code).startswith("ERROR"):
210
  evolved_solution_display_markdown = f"**Evolution Stage Failed:**\n{evolved_solution_code}"
211
  else:
212
- evolved_solution_display_markdown = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_code}\n```"
213
 
 
214
  if "python" in problem_type_selected.lower() and user_provided_tests_code.strip():
215
- progress(0.9, desc="Post-Evolution: Re-running Automated Tests on Evolved Code...")
216
- log_entries.append("\n--- Post-Evolution Sanity Check (Automated Tests on Evolved Code) ---")
217
-
218
- evolved_critique_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": 0.2, "max_tokens": critique_max_tokens}
219
-
220
- evolved_code_eval_result = evaluate_solution_candidate(
221
- str(evolved_solution_code), problem_description_text, problem_type_selected,
222
- user_provided_tests_code, evolved_critique_config
223
  )
224
 
225
  evolved_solution_display_markdown += (
226
  f"\n\n**Post-Evolution Automated Test Results (Simulated):**\n"
227
- f"{evolved_code_eval_result.execution_summary}\n" # This now comes from EvaluationResult
228
- f"Passed: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests}\n"
 
229
  )
230
- log_entries.append(f" Evolved Code Test Results: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests} passed. "
231
- f"Summary: {evolved_code_eval_result.execution_summary}")
 
 
 
 
232
 
233
- if evolved_code_eval_result.total_tests > 0 :
 
 
234
  progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...")
235
- log_entries.append("\n--- AI Analysis of Evolved Code's Test Results ---")
236
- analysis_user_prompt = format_code_test_analysis_user_prompt(
237
- str(evolved_solution_code),
238
- user_provided_tests_code,
239
- str(evolved_code_eval_result.execution_summary)
240
- )
241
- analysis_system_prompt = get_system_prompt("code_execution_explainer")
242
-
243
- llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"],
244
- "temp": 0.3, "max_tokens": critique_max_tokens + 150}
245
-
246
  from core.llm_clients import call_huggingface_api, call_gemini_api
247
-
248
  explanation_response_obj = None
249
- if llm_analysis_config["type"] == "hf":
250
- explanation_response_obj = call_huggingface_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
251
- elif llm_analysis_config["type"] == "google_gemini":
252
- explanation_response_obj = call_gemini_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
253
 
254
- if explanation_response_obj and explanation_response_obj.success:
255
- ai_test_analysis_markdown = f"**AI Analysis of Evolved Code's Test Performance:**\n{explanation_response_obj.text}"
256
- log_entries.append(f" AI Test Analysis (Snippet): {str(explanation_response_obj.text)[:100]}...")
257
- elif explanation_response_obj:
258
- ai_test_analysis_markdown = f"**AI Analysis of Test Performance Failed:**\n{explanation_response_obj.error}"
259
- log_entries.append(f" AI Test Analysis Error: {explanation_response_obj.error}")
260
 
 
261
  total_time = time.time() - start_time
262
- log_entries.append(f"\n**AlgoForge Prime™ Cycle Complete. Total time: {total_time:.2f} seconds.**")
263
  progress(1.0, desc="Cycle Complete!")
264
-
265
  return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown
266
 
267
 
268
  # --- Gradio UI Definition ---
 
 
 
 
 
 
 
 
269
  intro_markdown = """
270
- # ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution (v1.5 Gemini Focus)
271
- This version prioritizes Google Gemini 1.5 models and demonstrates a conceptual workflow for AI-assisted algorithm discovery,
272
- featuring (simulated) unit testing for Python code if provided.
273
 
274
- **API Keys Required in Space Secrets (should be working):**
275
- - `GOOGLE_API_KEY` (Primary): For Google Gemini API models. Ensure the "Generative Language API" (or similar) is enabled for your project.
276
  - `HF_TOKEN` (Secondary): For Hugging Face hosted models.
277
  """
278
-
279
  ui_token_status_md = ""
280
- if not GEMINI_API_READY and not HF_API_READY: # Use status from getters
281
- ui_token_status_md = "<p style='color:red;'>⚠️ **CRITICAL: NEITHER GOOGLE_API_KEY NOR HF_TOKEN are configured or working correctly.** The application will not be able to call any LLMs.</p>"
282
  else:
283
- if GEMINI_API_READY: ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected and configured.</p>"
284
- else: ui_token_status_md += "<p style='color:orange;'>⚠️ **GOOGLE_API_KEY missing or failed to configure.** Gemini API models will be disabled.</p>"
285
- if HF_API_READY: ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected and client initialized.</p>"
286
- else: ui_token_status_md += "<p style='color:orange;'>⚠️ **HF_TOKEN missing or client failed to initialize.** Hugging Face models will be disabled.</p>"
287
 
288
-
289
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="green", secondary_hue="lime"), title="AlgoForge Prime™ (1.5 Focus)") as app_demo:
290
  gr.Markdown(intro_markdown)
291
  gr.HTML(ui_token_status_md)
292
-
293
- # Check if any models are actually usable based on API readiness
294
- usable_models_available = any(
295
- AVAILABLE_MODELS_CONFIG.get(key, {}).get("type") != "none"
296
- for key in AVAILABLE_MODELS_CONFIG
297
- )
298
-
299
  if not usable_models_available:
300
- gr.Markdown("<h2 style='color:red;'>No LLM models are available for use. Please ensure at least one API key (Google or Hugging Face) is correctly set in this Space's Secrets and that the Space has been restarted.</h2>")
301
  else:
302
  with gr.Row():
303
- # Input Column
304
- with gr.Column(scale=2):
 
 
 
305
  gr.Markdown("## 💡 1. Define the Challenge")
306
- problem_type_dropdown = gr.Dropdown(
307
- choices=["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design", "Pseudocode Refinement"],
308
- label="Type of Problem / Algorithm", value="Python Algorithm with Tests",
309
- info="Select '...with Tests' to enable (simulated) unit testing if you provide tests below."
310
- )
311
- problem_description_textbox = gr.Textbox(
312
- lines=5, label="Problem Description / Desired Outcome",
313
- placeholder="Example for 'Python Algorithm with Tests':\n`def calculate_factorial(n: int) -> int:`\nCalculates factorial of n. Should handle n=0 (returns 1) and raise ValueError for n<0."
314
- )
315
- initial_hints_textbox = gr.Textbox(
316
- lines=3, label="Initial Thoughts / Constraints (Optional)",
317
- placeholder="E.g., 'Prefer an iterative solution over recursive for factorial.' or 'Consider time complexity.'"
318
- )
319
- user_tests_textbox = gr.Textbox(
320
- lines=6, label="Python Unit Tests (Optional, one `assert` per line)",
321
- placeholder="assert calculate_factorial(0) == 1\nassert calculate_factorial(5) == 120\n# For expected errors (advanced, not fully simulated here):\n# try:\n# calculate_factorial(-1)\n# assert False, \"ValueError not raised\"\n# except ValueError:\n# assert True",
322
- info="For 'Python Algorithm with Tests'. Ensure function names match your problem description."
323
- )
324
-
325
  gr.Markdown("## ⚙️ 2. Configure The Forge")
326
- model_selection_dropdown = gr.Dropdown(
327
- choices=list(AVAILABLE_MODELS_CONFIG.keys()),
328
- value=UI_DEFAULT_MODEL_KEY if UI_DEFAULT_MODEL_KEY in AVAILABLE_MODELS_CONFIG else (list(AVAILABLE_MODELS_CONFIG.keys())[0] if AVAILABLE_MODELS_CONFIG else None),
329
- label="Select LLM Core Model",
330
- info="Ensure the corresponding API key (Google or HF) is configured and working."
331
- )
332
- num_initial_solutions_slider = gr.Slider(minimum=1, maximum=3, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") # Max 3 for faster runs
333
-
334
- with gr.Accordion("Advanced LLM Parameters (Expert Users)", open=False):
335
- with gr.Row():
336
- genesis_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Genesis Temp", info="Higher = more creative, Lower = more deterministic.")
337
- genesis_max_tokens_slider = gr.Slider(minimum=256, maximum=4096, value=1024, step=128, label="Genesis Max Output Tokens")
338
- with gr.Row():
339
- critique_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.05, label="Critique Temp")
340
- critique_max_tokens_slider = gr.Slider(minimum=150, maximum=2048, value=512, step=64, label="Critique Max Output Tokens")
341
- with gr.Row():
342
- evolution_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.75, step=0.05, label="Evolution Temp")
343
- evolution_max_tokens_slider = gr.Slider(minimum=256, maximum=4096, value=1536, step=128, label="Evolution Max Output Tokens")
344
-
345
- engage_button = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg", elem_id="engage_button_elem")
346
-
347
- # Output Column
348
- with gr.Column(scale=3):
349
  gr.Markdown("## 🔥 3. The Forge's Output")
350
- with gr.Tabs(elem_id="output_tabs_elem"):
351
- with gr.TabItem("📜 Initial Candidates & Evaluations", id="tab_initial_evals"):
352
- output_initial_solutions_markdown = gr.Markdown(label="Generated Solutions & Combined Evaluations")
353
- with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)", id="tab_champion"):
354
- output_champion_markdown = gr.Markdown(label="Top Pick for Refinement")
355
- with gr.TabItem("🌟 Evolved Artifact & Test Analysis", id="tab_evolved"):
356
- output_evolved_markdown = gr.Markdown(label="Refined Solution from Evolutionary Forge")
357
- output_ai_test_analysis_markdown = gr.Markdown(label="AI Analysis of Evolved Code's Test Performance")
358
- with gr.TabItem("🛠️ Interaction Log (Developer View)", id="tab_log"):
359
- output_interaction_log_markdown = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
360
 
361
- # Connect button to the orchestration function
362
  engage_button.click(
363
  fn=run_algoforge_simulation_orchestrator,
364
- inputs=[
365
- problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox,
366
- num_initial_solutions_slider, model_selection_dropdown,
367
- genesis_temp_slider, genesis_max_tokens_slider,
368
- critique_temp_slider, critique_max_tokens_slider,
369
- evolution_temp_slider, evolution_max_tokens_slider
370
- ],
371
- outputs=[
372
- output_initial_solutions_markdown, output_champion_markdown,
373
- output_evolved_markdown, output_interaction_log_markdown,
374
- output_ai_test_analysis_markdown # Matched to the 5 outputs of orchestrator
375
- ]
376
  )
377
-
378
  gr.Markdown("---")
379
- gr.Markdown(
380
- "**Disclaimer:** This is a conceptual, educational demonstration. "
381
- "The (simulated) unit testing feature is for illustrative purposes. "
382
- "**NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
383
- "Implementing robust and secure code sandboxing is complex and absolutely critical for safety in real-world applications. "
384
- "LLM outputs always require careful human review and verification."
385
- )
386
 
387
  # --- Entry Point for Running the Gradio App ---
388
  if __name__ == "__main__":
389
  print("="*80)
390
- print("AlgoForge Prime(Modular Version - Gemini 1.5 Focus) - Launching...")
391
- # Print status based on the variables set after calling initialize_all_clients()
392
- print(f" Google Gemini API Configured: {GEMINI_API_READY}")
393
- print(f" Hugging Face API Configured: {HF_API_READY}")
394
- if not GEMINI_API_READY and not HF_API_READY:
395
- print(" CRITICAL WARNING: No API keys seem to be configured correctly. The application will likely be non-functional.")
396
- print(f" UI Default Model Key: {UI_DEFAULT_MODEL_KEY}")
397
- print(f" Available models for UI: {list(AVAILABLE_MODELS_CONFIG.keys())}")
398
- print("="*80)
399
  app_demo.launch(debug=True, server_name="0.0.0.0")
 
1
  # algoforge_prime/app.py
2
  import gradio as gr
3
  import os
4
+ import time
5
 
6
+ from core.llm_clients import initialize_all_clients, is_gemini_api_configured, is_hf_api_configured
7
+ initialize_all_clients()
 
 
8
 
 
9
  GEMINI_API_READY = is_gemini_api_configured()
10
  HF_API_READY = is_hf_api_configured()
11
 
12
  from core.generation_engine import generate_initial_solutions
13
+ from core.evaluation_engine import evaluate_solution_candidate, EvaluationResultOutput # Use new class name
14
  from core.evolution_engine import evolve_solution
15
  from prompts.system_prompts import get_system_prompt
16
  from prompts.prompt_templates import format_code_test_analysis_user_prompt
17
+ from core.safe_executor import execute_python_code_with_tests, ExecutionResult # For re-evaluating evolved code
18
 
19
  # --- Application Configuration (Models, Defaults) ---
20
+ # ... (Keep your AVAILABLE_MODELS_CONFIG and UI_DEFAULT_MODEL_KEY logic as in the previous full app.py)
21
  AVAILABLE_MODELS_CONFIG = {}
22
  UI_DEFAULT_MODEL_KEY = None
 
 
 
23
  GEMINI_1_5_PRO_LATEST_ID = "gemini-1.5-pro-latest"
24
  GEMINI_1_5_FLASH_LATEST_ID = "gemini-1.5-flash-latest"
 
 
25
  if GEMINI_API_READY:
26
  AVAILABLE_MODELS_CONFIG.update({
27
  f"Google Gemini 1.5 Pro (API - Recommended)": {"id": GEMINI_1_5_PRO_LATEST_ID, "type": "google_gemini"},
28
  f"Google Gemini 1.5 Flash (API - Fast)": {"id": GEMINI_1_5_FLASH_LATEST_ID, "type": "google_gemini"},
 
29
  "Google Gemini 1.0 Pro (API - Legacy)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
30
  })
 
31
  UI_DEFAULT_MODEL_KEY = f"Google Gemini 1.5 Pro (API - Recommended)"
32
+ if UI_DEFAULT_MODEL_KEY not in AVAILABLE_MODELS_CONFIG: UI_DEFAULT_MODEL_KEY = f"Google Gemini 1.5 Flash (API - Fast)"
33
+ else: print("WARNING: app.py - Gemini API not configured; Gemini models will be unavailable.")
 
 
 
 
 
 
34
  if HF_API_READY:
35
  AVAILABLE_MODELS_CONFIG.update({
36
  "Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
37
  "Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
 
38
  })
39
+ if not UI_DEFAULT_MODEL_KEY: UI_DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
40
+ else: print("WARNING: app.py - Hugging Face API not configured; HF models will be unavailable.")
 
 
 
 
 
 
 
41
  if not AVAILABLE_MODELS_CONFIG:
 
42
  AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys & Restart)"] = {"id": "dummy_error", "type": "none"}
43
  UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys & Restart)"
44
+ elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG: UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0]
 
 
 
45
 
46
 
47
  # --- Main Orchestration Logic for Gradio ---
48
  def run_algoforge_simulation_orchestrator(
49
+ problem_type_selected: str, problem_description_text: str, initial_hints_text: str,
50
+ user_provided_tests_code: str, num_initial_solutions_to_gen: int, selected_model_ui_key: str,
51
+ genesis_temp: float, genesis_max_tokens: int, critique_temp: float, critique_max_tokens: int,
 
 
 
 
 
52
  evolution_temp: float, evolution_max_tokens: int,
53
+ progress=gr.Progress(track_tqdm=True)
54
  ):
55
  progress(0, desc="Initializing AlgoForge Prime™...")
56
+ log_entries = [f"**AlgoForge Prime™ Omega Conceptual Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}**"]
57
+ # ... (input validation and model config setup as before) ...
 
 
 
 
 
 
 
58
  current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key)
59
+ if not current_model_config or current_model_config["type"] == "none": # Handle missing config
60
+ return "ERROR: Model configuration not found or invalid. Check API keys.", "", "", "\n".join(log_entries), ""
61
+ # ...
 
62
 
63
+ # --- STAGE 1: GENESIS ---
64
+ # ... (generate_initial_solutions call as before) ...
 
 
 
65
  llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens}
66
+ initial_raw_solutions = generate_initial_solutions(problem_description_text, initial_hints_text, problem_type_selected, num_initial_solutions_to_gen, llm_config_genesis)
 
67
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION ---
70
+ progress(0.25, desc="Stage 2: Evaluating Candidates with LLM & (Simulated) Tests...")
71
+ log_entries.append("\n**------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EXECUTION/EVALUATION ------**")
72
+ evaluated_candidates_list = []
73
+ llm_config_critique = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": critique_temp, "max_tokens": critique_max_tokens}
74
 
75
  for i, candidate_solution_text in enumerate(initial_raw_solutions):
76
+ # ... (progress update) ...
 
77
  log_entries.append(f"\n--- Evaluating Candidate {i+1} ---")
78
+ evaluation_output_obj = evaluate_solution_candidate( # type: EvaluationResultOutput
79
+ str(candidate_solution_text), problem_description_text, problem_type_selected,
 
 
80
  user_provided_tests_code, llm_config_critique
81
  )
 
 
 
 
 
 
82
  evaluated_candidates_list.append({
83
  "id": i + 1,
84
+ "solution_text": str(candidate_solution_text),
85
+ "evaluation_obj": evaluation_output_obj # Store the whole object
86
  })
87
+ log_entries.append(f" Combined Score: {evaluation_output_obj.combined_score}/10")
88
+ if evaluation_output_obj.execution_details:
89
+ log_entries.append(f" Test Results: {evaluation_output_obj.execution_details.passed_tests}/{evaluation_output_obj.execution_details.total_tests} passed.")
90
+ if evaluation_output_obj.execution_details.error: log_entries.append(f" Execution Error: {evaluation_output_obj.execution_details.error}")
91
+ log_entries.append(f" LLM Critique (Snippet): {str(evaluation_output_obj.llm_critique_text)[:150]}...")
92
 
 
93
  initial_solutions_display_markdown = []
94
  for data in evaluated_candidates_list:
95
  initial_solutions_display_markdown.append(
96
+ f"**Candidate {data['id']}:**\n```python\n{data['solution_text']}\n```\n\n"
97
+ f"**Evaluation Verdict (Combined Score: {data['evaluation_obj'].combined_score}/10):**\n{data['evaluation_obj'].get_display_critique()}\n---" # Use method
 
 
 
98
  )
99
 
100
  # --- STAGE 3: SELECTION OF CHAMPION ---
101
+ # ... (selection logic as before, but use `data['evaluation_obj'].combined_score`) ...
102
+ progress(0.7, desc="Stage 3: Selecting Champion...")
 
103
  potentially_viable_candidates = [
104
  cand for cand in evaluated_candidates_list
105
+ if cand["evaluation_obj"] and cand["evaluation_obj"].combined_score > 0 and \
106
  cand["solution_text"] and not str(cand["solution_text"]).startswith("ERROR")
107
  ]
108
+ if not potentially_viable_candidates: # Handle no viable candidates
109
+ return "\n\n".join(initial_solutions_display_markdown), "No viable candidates after evaluation.", "", "\n".join(log_entries), ""
110
+ potentially_viable_candidates.sort(key=lambda x: x["evaluation_obj"].combined_score, reverse=True)
 
 
 
 
111
  champion_candidate_data = potentially_viable_candidates[0]
 
 
 
 
 
112
  champion_display_markdown = (
113
  f"**Champion Candidate ID: {champion_candidate_data['id']} "
114
+ f"(Original Combined Score: {champion_candidate_data['evaluation_obj'].combined_score}/10):**\n"
115
  f"```python\n{champion_candidate_data['solution_text']}\n```\n\n"
116
+ f"**Original Comprehensive Evaluation for this Champion:**\n{champion_candidate_data['evaluation_obj'].get_display_critique()}"
 
117
  )
118
 
119
+
120
  # --- STAGE 4: EVOLUTIONARY FORGE ---
121
+ progress(0.75, desc="Stage 4: Evolving Champion...")
122
+ log_entries.append("\n**------ STAGE 4: EVOLUTIONARY FORGE (Informed by Tests) ------**")
123
+ llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens}
124
 
125
  evolved_solution_code = evolve_solution(
126
  str(champion_candidate_data["solution_text"]),
127
+ champion_candidate_data["evaluation_obj"], # Pass the whole EvaluationResultOutput object
128
+ # champion_candidate_data["evaluation_obj"].combined_score, # Score is inside the object
129
  problem_description_text,
130
  problem_type_selected,
131
  llm_config_evolution
132
  )
133
+ # ... (log evolved solution snippet) ...
 
134
  evolved_solution_display_markdown = ""
135
+ ai_test_analysis_markdown = ""
136
 
137
  if str(evolved_solution_code).startswith("ERROR"):
138
  evolved_solution_display_markdown = f"**Evolution Stage Failed:**\n{evolved_solution_code}"
139
  else:
140
+ evolved_solution_display_markdown = f"**✨ AlgoForge Omega™ Evolved Artifact ✨:**\n```python\n{evolved_solution_code}\n```"
141
 
142
+ # Re-evaluate the evolved solution with unit tests
143
  if "python" in problem_type_selected.lower() and user_provided_tests_code.strip():
144
+ progress(0.9, desc="Post-Evolution: Re-testing Evolved Code...")
145
+ log_entries.append("\n--- Post-Evolution Test of Evolved Code ---")
146
+ # Use the actual safe_executor here directly for testing evolved code
147
+ evolved_code_exec_result = execute_python_code_with_tests( # type: ExecutionResult
148
+ str(evolved_solution_code), user_provided_tests_code, timeout_seconds=10
 
 
 
149
  )
150
 
151
  evolved_solution_display_markdown += (
152
  f"\n\n**Post-Evolution Automated Test Results (Simulated):**\n"
153
+ f" Tests Attempted: {evolved_code_exec_result.total_tests}\n"
154
+ f" Tests Passed: {evolved_code_exec_result.passed_tests}\n"
155
+ f" Execution Time: {evolved_code_exec_result.execution_time:.4f}s\n"
156
  )
157
+ if evolved_code_exec_result.error:
158
+ evolved_solution_display_markdown += f" Execution Error/Output: {evolved_code_exec_result.error}\n"
159
+ elif evolved_code_exec_result.output:
160
+ evolved_solution_display_markdown += f" Execution Output (stdout):\n```\n{evolved_code_exec_result.output[:300]}\n```\n"
161
+
162
+ log_entries.append(f" Evolved Code Test Results: {evolved_code_exec_result}")
163
 
164
+ # Get LLM to explain the test results of the evolved code
165
+ if evolved_code_exec_result.total_tests > 0 :
166
+ # ... (AI Test Analysis call as before, using evolved_code_exec_result.error or .output for summary) ...
167
  progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...")
168
+ analysis_exec_summary = evolved_code_exec_result.error if evolved_code_exec_result.error else (evolved_code_exec_result.output if evolved_code_exec_result.output else "Tests completed.")
169
+ analysis_user_prompt = format_code_test_analysis_user_prompt(str(evolved_solution_code), user_provided_tests_code, f"Passed: {evolved_code_exec_result.passed_tests}/{evolved_code_exec_result.total_tests}. Detail: {analysis_exec_summary}")
170
+ # ... (rest of AI analysis call) ...
 
 
 
 
 
 
 
 
171
  from core.llm_clients import call_huggingface_api, call_gemini_api
172
+ llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": 0.3, "max_tokens": critique_max_tokens + 150}
173
  explanation_response_obj = None
174
+ if llm_analysis_config["type"] == "hf": explanation_response_obj = call_huggingface_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], get_system_prompt("code_execution_explainer"))
175
+ elif llm_analysis_config["type"] == "google_gemini": explanation_response_obj = call_gemini_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], get_system_prompt("code_execution_explainer"))
176
+ if explanation_response_obj and explanation_response_obj.success: ai_test_analysis_markdown = f"**AI Analysis of Evolved Code's Test Performance:**\n{explanation_response_obj.text}"
177
+ elif explanation_response_obj: ai_test_analysis_markdown = f"**AI Analysis Failed:**\n{explanation_response_obj.error}"
178
 
 
 
 
 
 
 
179
 
180
+ # ... (Total time logging and return statement as before) ...
181
  total_time = time.time() - start_time
182
+ log_entries.append(f"\n**AlgoForge Omega Cycle Complete. Total time: {total_time:.2f} seconds.**")
183
  progress(1.0, desc="Cycle Complete!")
 
184
  return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown
185
 
186
 
187
  # --- Gradio UI Definition ---
188
+ # (This is IDENTICAL to the UI in the previous full app.py where you introduced the user_tests_tb)
189
+ # For brevity, ensure it's copied correctly. Key parts:
190
+ # - `intro_markdown` with updated title "AlgoForge Omega™ Conceptual Demo"
191
+ # - `ui_token_status_md` based on GEMINI_API_READY, HF_API_READY
192
+ # - `user_tests_tb` Gradio Textbox component
193
+ # - All sliders and dropdowns
194
+ # - `engage_button.click` connecting to `run_algoforge_simulation_orchestrator` with all inputs/outputs.
195
+ # ... PASTE THE FULL UI DEFINITION HERE from your last complete app.py version ...
196
  intro_markdown = """
197
+ # ✨ AlgoForge Omega™ ✨: Conceptual Demo with (Simulated) Execution
198
+ This version demonstrates a conceptual workflow for AI-assisted algorithm discovery and refinement,
199
+ featuring **(simulated) execution of generated Python code against user-provided unit tests**.
200
 
201
+ **API Keys Required in Space Secrets:**
202
+ - `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
203
  - `HF_TOKEN` (Secondary): For Hugging Face hosted models.
204
  """
205
+ # ... (ui_token_status_md as before) ...
206
  ui_token_status_md = ""
207
+ if not GEMINI_API_READY and not HF_API_READY: ui_token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP NON-FUNCTIONAL.</p>"
 
208
  else:
209
+ if GEMINI_API_READY: ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API Configured.</p>"
210
+ else: ui_token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed.</p>"
211
+ if HF_API_READY: ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API Configured.</p>"
212
+ else: ui_token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed.</p>"
213
 
214
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", secondary_hue="pink"), title="AlgoForge Omega™ Demo") as app_demo: # New theme
 
215
  gr.Markdown(intro_markdown)
216
  gr.HTML(ui_token_status_md)
217
+ # ... (Full UI layout as provided in the "write all the files" response, ensuring all inputs/outputs match orchestrator)
218
+ # This is the same UI structure as the last full app.py I provided.
219
+ # Ensure all inputs to engage_button.click match the orchestrator's signature.
220
+ # For brevity, I'm showing the structure. Copy from the previous full version.
221
+ usable_models_available = any(AVAILABLE_MODELS_CONFIG.get(key, {}).get("type") != "none" for key in AVAILABLE_MODELS_CONFIG)
 
 
222
  if not usable_models_available:
223
+ gr.Markdown("<h2 style='color:red;'>No LLM models available for use. Check API keys and restart.</h2>")
224
  else:
225
  with gr.Row():
226
+ with gr.Column(scale=2): # Input Column
227
+ # ... (problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox)
228
+ # ... (model_selection_dropdown, num_initial_solutions_slider)
229
+ # ... (Accordion with LLM parameter sliders)
230
+ # ... (engage_button)
231
  gr.Markdown("## 💡 1. Define the Challenge")
232
+ problem_type_dropdown = gr.Dropdown(choices=["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea"], label="Problem Type", value="Python Algorithm with Tests")
233
+ problem_description_textbox = gr.Textbox(lines=5, label="Problem Description")
234
+ initial_hints_textbox = gr.Textbox(lines=3, label="Initial Hints (Optional)")
235
+ user_tests_textbox = gr.Textbox(lines=6, label="Python Unit Tests (Optional, one `assert` per line)", placeholder="assert my_func(1) == 1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  gr.Markdown("## ⚙️ 2. Configure The Forge")
237
+ model_selection_dropdown = gr.Dropdown(choices=list(AVAILABLE_MODELS_CONFIG.keys()), value=UI_DEFAULT_MODEL_KEY, label="LLM Core Model")
238
+ num_initial_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="# Initial Solutions")
239
+ with gr.Accordion("Advanced LLM Parameters", open=False):
240
+ genesis_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp")
241
+ genesis_max_tokens_slider = gr.Slider(256, 4096, value=1024, step=128, label="Genesis Max Tokens")
242
+ critique_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Critique Temp")
243
+ critique_max_tokens_slider = gr.Slider(150, 2048, value=512, step=64, label="Critique Max Tokens")
244
+ evolution_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp")
245
+ evolution_max_tokens_slider = gr.Slider(256, 4096, value=1536, step=128, label="Evolution Max Tokens")
246
+ engage_button = gr.Button("🚀 ENGAGE ALGOFORGE OMEGA™ 🚀", variant="primary")
247
+
248
+
249
+ with gr.Column(scale=3): # Output Column
 
 
 
 
 
 
 
 
 
 
250
  gr.Markdown("## 🔥 3. The Forge's Output")
251
+ with gr.Tabs():
252
+ with gr.TabItem("📜 Candidates & Evaluations"): output_initial_solutions_markdown = gr.Markdown()
253
+ with gr.TabItem("🏆 Champion"): output_champion_markdown = gr.Markdown()
254
+ with gr.TabItem("🌟 Evolved & Tested"):
255
+ output_evolved_markdown = gr.Markdown()
256
+ output_ai_test_analysis_markdown = gr.Markdown()
257
+ with gr.TabItem("🛠️ Log"): output_interaction_log_markdown = gr.Markdown()
 
 
 
258
 
 
259
  engage_button.click(
260
  fn=run_algoforge_simulation_orchestrator,
261
+ inputs=[ problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox, num_initial_solutions_slider, model_selection_dropdown, genesis_temp_slider, genesis_max_tokens_slider, critique_temp_slider, critique_max_tokens_slider, evolution_temp_slider, evolution_max_tokens_slider ],
262
+ outputs=[ output_initial_solutions_markdown, output_champion_markdown, output_evolved_markdown, output_interaction_log_markdown, output_ai_test_analysis_markdown ]
 
 
 
 
 
 
 
 
 
 
263
  )
 
264
  gr.Markdown("---")
265
+ gr.Markdown("**Disclaimer:** Conceptual Omega Demo. (Simulated) unit testing. **NEVER run untrusted LLM code without robust sandboxing.**")
266
+
 
 
 
 
 
267
 
268
  # --- Entry Point for Running the Gradio App ---
269
  if __name__ == "__main__":
270
  print("="*80)
271
+ print("AlgoForge OmegaConceptual Demo - Launching...")
272
+ print(f" Gemini API Ready: {GEMINI_API_READY}")
273
+ print(f" HF API Ready: {HF_API_READY}")
274
+ # ... (other startup prints)
 
 
 
 
 
275
  app_demo.launch(debug=True, server_name="0.0.0.0")