Commit
Β·
57cfafe
1
Parent(s):
a119cd7
add template script for generating constraint models using LLMs
Browse files- template.py +125 -1
template.py
CHANGED
@@ -1 +1,125 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from datasets import load_dataset
|
3 |
+
from openai import OpenAI
|
4 |
+
from together import Together
|
5 |
+
|
6 |
+
# --- Dataset Configuration (NOT TO BE CHANGED) ---
|
7 |
+
GT_DATASET_NAME = "kostis-init/CP-Bench"
|
8 |
+
DATASET_SPLIT = "train"
|
9 |
+
PROBLEM_ID_COLUMN = "id"
|
10 |
+
PROBLEM_DESCRIPTION_COLUMN = "description"
|
11 |
+
PROBLEM_DATA_COLUMN = "input_data"
|
12 |
+
PROBLEM_DECISION_VARS_COLUMN = "decision_variables"
|
13 |
+
# --- End of Dataset Configuration ---
|
14 |
+
|
15 |
+
#######################################################################
|
16 |
+
# This is an example script to generate constraint models using LLMs. #
|
17 |
+
# You can use this as a starting point for your own approach. #
|
18 |
+
#######################################################################
|
19 |
+
|
20 |
+
# --- Submission Approach Configuration ---
|
21 |
+
LLM_CLIENT = OpenAI(api_key='YOUR_API_KEY') # TODO: Replace with your OpenAI API key, and/or use another LLM client (e.g. Together AI)
|
22 |
+
LLM_ID = "gpt-4o" # TODO: Choose the LLM model you want to use
|
23 |
+
LLM_TEMPERATURE = 0.5 # Controls the randomness of the output (the lower, the more deterministic)
|
24 |
+
LLM_SEED = 42 # Seed for reproducibility (optional, but recommended)
|
25 |
+
LLM_MAX_TOKENS = 3000 # Maximum number of tokens in the generated model (adjust based on your needs)
|
26 |
+
LLM_TOP_P = 1.0 # Top-p sampling parameter (1.0 means no filtering)
|
27 |
+
|
28 |
+
TARGET_MODELLING_FRAMEWORK = "CPMpy"
|
29 |
+
OUTPUT_FILE = "template_submission.jsonl"
|
30 |
+
|
31 |
+
# This is the main instruction given to the LLM to generate the model.
|
32 |
+
SYSTEM_PROMPT_TEMPLATE = f"""You are an expert in constraint programming.
|
33 |
+
Your task is to convert the given natural language problem description into a complete and runnable {TARGET_MODELLING_FRAMEWORK} model.
|
34 |
+
The model should be self-contained.
|
35 |
+
If the problem implies specific data instances (e.g., sizes, arrays from the description), embed them directly into the model's data section or as fixed parameters.
|
36 |
+
The model must print its solution as a JSON object to standard output.
|
37 |
+
The keys in the JSON output should correspond to the decision variables relevant to the problem, which will be provided in the problem description.
|
38 |
+
Do not include any explanations or introductory text, just the model code between triple backticks.
|
39 |
+
For example:
|
40 |
+
```python
|
41 |
+
... (the model code here) ...
|
42 |
+
```
|
43 |
+
"""
|
44 |
+
|
45 |
+
|
46 |
+
def generate_model_with_llm(problem_id: str, description: str, input_data: str, decision_variables: [str]) -> str:
|
47 |
+
|
48 |
+
user_prompt = f"Problem Description:\n{description}\n\n"
|
49 |
+
if decision_variables:
|
50 |
+
user_prompt += (f"The solution should be a JSON object. "
|
51 |
+
f"The key(s) should strictly be: {', '.join(decision_variables)}.\n\n")
|
52 |
+
if input_data:
|
53 |
+
user_prompt += f"Input Data:\n{input_data}\n\n"
|
54 |
+
user_prompt += f"Generate the {TARGET_MODELLING_FRAMEWORK} model."
|
55 |
+
|
56 |
+
messages = [
|
57 |
+
{"role": "system", "content": SYSTEM_PROMPT_TEMPLATE},
|
58 |
+
{"role": "user", "content": user_prompt}
|
59 |
+
]
|
60 |
+
|
61 |
+
try:
|
62 |
+
print(f" [LLM Call] Generating model for problem: {problem_id} (Framework: {TARGET_MODELLING_FRAMEWORK})...")
|
63 |
+
response = LLM_CLIENT.chat.completions.create(
|
64 |
+
messages=messages,
|
65 |
+
model=LLM_ID,
|
66 |
+
temperature=LLM_TEMPERATURE,
|
67 |
+
seed=LLM_SEED,
|
68 |
+
max_tokens=LLM_MAX_TOKENS,
|
69 |
+
top_p=LLM_TOP_P
|
70 |
+
)
|
71 |
+
max_tokens_reached = (response.choices[0].finish_reason == 'length')
|
72 |
+
if max_tokens_reached:
|
73 |
+
print(f" [LLM Call] Warning: Max tokens reached for problem {problem_id}. The model may be incomplete.")
|
74 |
+
|
75 |
+
generated_model = response.choices[0].message.content.strip()
|
76 |
+
if '```python' in generated_model:
|
77 |
+
generated_model = generated_model.split('```python')[1].split('```')[0].strip()
|
78 |
+
|
79 |
+
print(f" [LLM Call] Successfully generated model for problem: {problem_id}")
|
80 |
+
return generated_model
|
81 |
+
except Exception as e:
|
82 |
+
print(f" [LLM Call] Error generating model for problem {problem_id}: {type(e).__name__} - {e}")
|
83 |
+
return f"# Error generating model for problem {problem_id}: {type(e).__name__} - {e}\n# Please check the LLM configuration and try again."
|
84 |
+
|
85 |
+
|
86 |
+
def main():
|
87 |
+
print(f"Starting model generation script for {TARGET_MODELLING_FRAMEWORK}.")
|
88 |
+
print(f"Loading dataset '{GT_DATASET_NAME}'...")
|
89 |
+
|
90 |
+
try:
|
91 |
+
dataset = load_dataset(GT_DATASET_NAME, split=DATASET_SPLIT, trust_remote_code=True)
|
92 |
+
print(f"Dataset loaded. Number of problems: {len(dataset)}")
|
93 |
+
except Exception as e:
|
94 |
+
print(f"CRITICAL ERROR - Failed to load dataset '{GT_DATASET_NAME}': {type(e).__name__} - {e}")
|
95 |
+
return
|
96 |
+
|
97 |
+
print(f"Generating models and writing to '{OUTPUT_FILE}'...")
|
98 |
+
count_generated = 0
|
99 |
+
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f_out:
|
100 |
+
for i, item in enumerate(dataset):
|
101 |
+
problem_id = item.get(PROBLEM_ID_COLUMN)
|
102 |
+
description = item.get(PROBLEM_DESCRIPTION_COLUMN)
|
103 |
+
input_data = item.get(PROBLEM_DATA_COLUMN)
|
104 |
+
decision_vars = item.get(PROBLEM_DECISION_VARS_COLUMN)
|
105 |
+
|
106 |
+
if not problem_id or description is None:
|
107 |
+
print(f" Skipping item {i + 1} due to missing ID or description (is None). Problem data: {item}")
|
108 |
+
continue
|
109 |
+
|
110 |
+
print(f"\nProcessing problem {i + 1}/{len(dataset)}: ID = '{problem_id}'")
|
111 |
+
|
112 |
+
generated_model_str = generate_model_with_llm(problem_id, description, input_data, decision_vars)
|
113 |
+
|
114 |
+
submission_entry = {
|
115 |
+
"id": problem_id,
|
116 |
+
"model": generated_model_str
|
117 |
+
}
|
118 |
+
f_out.write(json.dumps(submission_entry) + '\n')
|
119 |
+
count_generated += 1
|
120 |
+
|
121 |
+
print(f"\nProcessing complete. {count_generated} models generated and saved to '{OUTPUT_FILE}'.")
|
122 |
+
|
123 |
+
|
124 |
+
if __name__ == "__main__":
|
125 |
+
main()
|