Spaces:
Running
Running
Delanoe Pirard
commited on
Commit
·
4c353e9
1
Parent(s):
9d75f65
Ajout du binaire stockfish via Git LFS
Browse files- agents/code_agent.py +63 -13
- app.py +1 -1
- prompts/code_gen_prompt.txt +37 -42
agents/code_agent.py
CHANGED
@@ -7,6 +7,9 @@ from llama_index.llms.google_genai import GoogleGenAI
|
|
7 |
from llama_index.llms.openai import OpenAI
|
8 |
from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
|
9 |
|
|
|
|
|
|
|
10 |
# Setup logging
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
@@ -59,12 +62,14 @@ def generate_python_code(prompt: str) -> str:
|
|
59 |
gen_prompt_template = load_prompt_from_file("../prompts/code_gen_prompt.txt", default_gen_prompt_template)
|
60 |
input_prompt = gen_prompt_template.format(prompt=prompt)
|
61 |
|
|
|
|
|
62 |
try:
|
63 |
llm = OpenAI(
|
64 |
model=gen_llm_model,
|
65 |
api_key=gen_api_key,
|
66 |
reasoning_effort="high",
|
67 |
-
temperature=0.
|
68 |
max_tokens=16384
|
69 |
)
|
70 |
logger.info(f"Using code generation LLM: {gen_llm_model}")
|
@@ -127,22 +132,65 @@ def initialize_code_agent() -> ReActAgent:
|
|
127 |
llm = GoogleGenAI(
|
128 |
api_key=gemini_api_key,
|
129 |
model=agent_llm_model,
|
130 |
-
temperature=0.
|
131 |
)
|
132 |
logger.info(f"Using agent LLM: {agent_llm_model}")
|
133 |
|
134 |
# Load system prompt (consider loading from file)
|
135 |
default_system_prompt = """\
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", default_system_prompt)
|
147 |
|
148 |
agent = ReActAgent(
|
@@ -179,7 +227,9 @@ def initialize_code_agent() -> ReActAgent:
|
|
179 |
"- stockfish==3.28.0 : UCI interface to Stockfish chess engine\n"
|
180 |
"- sympy>=1.14.0 : Symbolic math, algebra, calculus CAS\n"
|
181 |
"- youtube-transcript-api>=1.0.3 : Fetch YouTube video transcripts via API\n"
|
182 |
-
"- yt-dlp>=2025.3.31 : Download videos/playlists from YouTube and other sites\n"
|
|
|
|
|
183 |
),
|
184 |
# REMOVED: code_execute_fn - Execution is handled by the code_interpreter tool via the agent loop.
|
185 |
tools=[
|
|
|
7 |
from llama_index.llms.openai import OpenAI
|
8 |
from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
|
9 |
|
10 |
+
import dotenv
|
11 |
+
dotenv.load_dotenv()
|
12 |
+
|
13 |
# Setup logging
|
14 |
logger = logging.getLogger(__name__)
|
15 |
|
|
|
62 |
gen_prompt_template = load_prompt_from_file("../prompts/code_gen_prompt.txt", default_gen_prompt_template)
|
63 |
input_prompt = gen_prompt_template.format(prompt=prompt)
|
64 |
|
65 |
+
print(gen_prompt_template)
|
66 |
+
|
67 |
try:
|
68 |
llm = OpenAI(
|
69 |
model=gen_llm_model,
|
70 |
api_key=gen_api_key,
|
71 |
reasoning_effort="high",
|
72 |
+
temperature=0.0,
|
73 |
max_tokens=16384
|
74 |
)
|
75 |
logger.info(f"Using code generation LLM: {gen_llm_model}")
|
|
|
132 |
llm = GoogleGenAI(
|
133 |
api_key=gemini_api_key,
|
134 |
model=agent_llm_model,
|
135 |
+
temperature=0.0
|
136 |
)
|
137 |
logger.info(f"Using agent LLM: {agent_llm_model}")
|
138 |
|
139 |
# Load system prompt (consider loading from file)
|
140 |
default_system_prompt = """\
|
141 |
+
You are CodeAgent, a specialist in generating and executing Python code. Your mission:
|
142 |
+
|
143 |
+
1. **Thought**: Think step-by-step before acting and state your reasoning.
|
144 |
+
2. **Code Generation**: To produce code, call `python_code_generator` with a concise, unambiguous prompt. Review the generated code for correctness and safety.
|
145 |
+
3. **Execution & Testing**: To execute or test code, call `code_interpreter`. Provide the complete code snippet. Analyze its output (stdout, stderr, result) to verify functionality and debug errors.
|
146 |
+
4. **Iteration**: If execution fails or the result is incorrect, analyze the error, think about the fix, generate corrected code using `python_code_generator`, and execute again using `code_interpreter`.
|
147 |
+
5. **Tool Use**: Always adhere strictly to each tool’s input/output format.
|
148 |
+
6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
|
149 |
+
7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
|
150 |
+
|
151 |
+
**Special Instructions for Chess-Related Tasks**:
|
152 |
+
- Prioritize using the Stockfish engine to solve chess problems.
|
153 |
+
- The Stockfish engine executable is located at `./stockfish`.
|
154 |
+
- To initialize Stockfish in code, use:
|
155 |
+
|
156 |
+
from stockfish import Stockfish
|
157 |
+
stockfish = Stockfish(path="./stockfish") - The Stockfish engine executable is located at `./stockfish` or the key "STOCKFISH_PATH" registered the path to the executable in the environment variables.
|
158 |
+
|
159 |
+
- Use `python-chess` to represent boards, generate and validate moves, and parse PGN/FEN.
|
160 |
+
|
161 |
+
**Available Python Packages**:
|
162 |
+
|
163 |
+
- beautifulsoup4: HTML/XML parsing and lightweight web scraping
|
164 |
+
- certifi: Mozilla CA bundle for secure TLS/SSL requests
|
165 |
+
- datasets: Hugging Face dataset loading and streaming
|
166 |
+
- dotenv: Load environment variables from .env files
|
167 |
+
- duckdb: In‑process OLAP SQL engine (analytics, Parquet, Arrow)
|
168 |
+
- ffmpeg-python: Wrapper around FFmpeg for audio/video operations
|
169 |
+
- gradio[oauth]: Rapid web‑UI prototyping with optional OAuth
|
170 |
+
- helium: High‑level Selenium / browser automation toolkit
|
171 |
+
- huggingface: Interact with Hugging Face Hub models, datasets, spaces
|
172 |
+
- imageio: Read and write images, GIFs, MP4s, volumes, etc.
|
173 |
+
- matplotlib: 2‑D plotting (figures, axes, annotations)
|
174 |
+
- numpy: N‑dimensional arrays and vectorized math
|
175 |
+
- openai-whisper: Speech‑to‑text transcription
|
176 |
+
- opencv-python: Computer vision, image/video processing
|
177 |
+
- openpyxl: Excel .xlsx read/write, styles, formulas
|
178 |
+
- pandas: DataFrames, time series, CSV/Parquet I/O
|
179 |
+
- pyarrow: Apache Arrow tables, Parquet, Flight RPC
|
180 |
+
- pygame: Simple 2‑D game/graphics engine (SDL based)
|
181 |
+
- python-chess: Chess move generation, PGN/FEN handling, engine UCI integration
|
182 |
+
- requests: HTTP/HTTPS client with sessions and retries
|
183 |
+
- scikit-learn: Machine‑learning algorithms, preprocessing, pipelines
|
184 |
+
- scipy: Scientific computing, optimization, signal processing
|
185 |
+
- seaborn: Statistical visualization on top of matplotlib
|
186 |
+
- sqlalchemy: SQL ORM and core engine for many databases
|
187 |
+
- statsmodels: Econometrics and statistical modeling (GLM, ARIMA)
|
188 |
+
- stockfish: UCI interface to Stockfish chess engine
|
189 |
+
- sympy: Symbolic math, algebra, calculus CAS
|
190 |
+
- youtube-transcript-api: Fetch YouTube video transcripts via API
|
191 |
+
- yt-dlp: Download videos/playlists from YouTube and other sites
|
192 |
+
"""
|
193 |
+
|
194 |
system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", default_system_prompt)
|
195 |
|
196 |
agent = ReActAgent(
|
|
|
227 |
"- stockfish==3.28.0 : UCI interface to Stockfish chess engine\n"
|
228 |
"- sympy>=1.14.0 : Symbolic math, algebra, calculus CAS\n"
|
229 |
"- youtube-transcript-api>=1.0.3 : Fetch YouTube video transcripts via API\n"
|
230 |
+
"- yt-dlp>=2025.3.31 : Download videos/playlists from YouTube and other sites\n\n"
|
231 |
+
"Additionally, the `stockfish` package enables the agent to solve chess problems by analyzing positions, "
|
232 |
+
"identifying tactical motifs, and calculating optimal move sequences, making it a valuable tool for chess training and analysis."
|
233 |
),
|
234 |
# REMOVED: code_execute_fn - Execution is handled by the code_interpreter tool via the agent loop.
|
235 |
tools=[
|
app.py
CHANGED
@@ -387,7 +387,7 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
387 |
return "Failed to fetch questions.", None
|
388 |
|
389 |
# 3. Process Questions
|
390 |
-
|
391 |
for item in questions_data:
|
392 |
answers = await process_question(agent, item, fetch_file_url)
|
393 |
results_log.append(answers)
|
|
|
387 |
return "Failed to fetch questions.", None
|
388 |
|
389 |
# 3. Process Questions
|
390 |
+
questions_data = [questions_data[3]]
|
391 |
for item in questions_data:
|
392 |
answers = await process_question(agent, item, fetch_file_url)
|
393 |
results_log.append(answers)
|
prompts/code_gen_prompt.txt
CHANGED
@@ -8,49 +8,44 @@ You are CodeAgent, a specialist in generating and executing Python code. Your mi
|
|
8 |
6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
|
9 |
7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
Do not use any markdown.
|
16 |
-
Notes:
|
17 |
-
- The generated code may be complex; it is recommended to review and test
|
18 |
-
it before execution.
|
19 |
-
- This function only generates code and does not execute it.
|
20 |
-
- The following Python packages are available in the environment:
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
datasets>=3.5.1,
|
25 |
-
dotenv>=0.9.9,
|
26 |
-
duckdb>=1.2.2,
|
27 |
-
ffmpeg-python>=0.2.0,
|
28 |
-
gradio[oauth]>=5.28.0,
|
29 |
-
helium>=5.1.1,
|
30 |
-
huggingface>=0.0.1,
|
31 |
-
imageio>=2.37.0,
|
32 |
-
matplotlib>=3.10.1,
|
33 |
-
numpy>=2.2.5,
|
34 |
-
openai-whisper>=20240930,
|
35 |
-
opencv-python>=4.11.0.86,
|
36 |
-
openpyxl>=3.1.5,
|
37 |
-
pandas>=2.2.3,
|
38 |
-
pyarrow>=20.0.0,
|
39 |
-
pygame>=2.6.1,
|
40 |
-
python-chess>=1.999,
|
41 |
-
requests>=2.32.3,
|
42 |
-
scikit-learn>=1.6.1,
|
43 |
-
scipy>=1.15.2,
|
44 |
-
seaborn>=0.13.2,
|
45 |
-
sqlalchemy>=2.0.40,
|
46 |
-
statsmodels>=0.14.4,
|
47 |
-
stockfish==3.28.0,
|
48 |
-
sympy>=1.14.0,
|
49 |
-
youtube-transcript-api>=1.0.3,
|
50 |
-
yt-dlp>=2025.3.31
|
51 |
|
52 |
-
|
53 |
|
54 |
-
|
55 |
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
|
9 |
7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
|
10 |
|
11 |
+
**Special Instructions for Chess-Related Tasks**:
|
12 |
+
- Prioritize using the Stockfish engine to solve chess problems.
|
13 |
+
- The Stockfish engine executable is located at `stockfish` or the key "STOCKFISH_PATH" registered the path to the executable in the environment variables.
|
14 |
+
- To initialize Stockfish in code, use:
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
from stockfish import Stockfish
|
17 |
+
stockfish = Stockfish(path="stockfish")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
- Use `python-chess` to represent boards, generate and validate moves, and parse PGN/FEN.
|
20 |
|
21 |
+
**Available Python Packages**:
|
22 |
|
23 |
+
- beautifulsoup4: HTML/XML parsing and lightweight web scraping
|
24 |
+
- certifi: Mozilla CA bundle for secure TLS/SSL requests
|
25 |
+
- datasets: Hugging Face dataset loading and streaming
|
26 |
+
- dotenv: Load environment variables from .env files
|
27 |
+
- duckdb: In‑process OLAP SQL engine (analytics, Parquet, Arrow)
|
28 |
+
- ffmpeg-python: Wrapper around FFmpeg for audio/video operations
|
29 |
+
- gradio[oauth]: Rapid web‑UI prototyping with optional OAuth
|
30 |
+
- helium: High‑level Selenium / browser automation toolkit
|
31 |
+
- huggingface: Interact with Hugging Face Hub models, datasets, spaces
|
32 |
+
- imageio: Read and write images, GIFs, MP4s, volumes, etc.
|
33 |
+
- matplotlib: 2‑D plotting (figures, axes, annotations)
|
34 |
+
- numpy: N‑dimensional arrays and vectorized math
|
35 |
+
- openai-whisper: Speech‑to‑text transcription
|
36 |
+
- opencv-python: Computer vision, image/video processing
|
37 |
+
- openpyxl: Excel .xlsx read/write, styles, formulas
|
38 |
+
- pandas: DataFrames, time series, CSV/Parquet I/O
|
39 |
+
- pyarrow: Apache Arrow tables, Parquet, Flight RPC
|
40 |
+
- pygame: Simple 2‑D game/graphics engine (SDL based)
|
41 |
+
- python-chess: Chess move generation, PGN/FEN handling, engine UCI integration
|
42 |
+
- requests: HTTP/HTTPS client with sessions and retries
|
43 |
+
- scikit-learn: Machine‑learning algorithms, preprocessing, pipelines
|
44 |
+
- scipy: Scientific computing, optimization, signal processing
|
45 |
+
- seaborn: Statistical visualization on top of matplotlib
|
46 |
+
- sqlalchemy: SQL ORM and core engine for many databases
|
47 |
+
- statsmodels: Econometrics and statistical modeling (GLM, ARIMA)
|
48 |
+
- stockfish: UCI interface to Stockfish chess engine
|
49 |
+
- sympy: Symbolic math, algebra, calculus CAS
|
50 |
+
- youtube-transcript-api: Fetch YouTube video transcripts via API
|
51 |
+
- yt-dlp: Download videos/playlists from YouTube and other sites
|