kostis-init commited on
Commit
57cfafe
Β·
1 Parent(s): a119cd7

add template script for generating constraint models using LLMs

Browse files
Files changed (1) hide show
  1. template.py +125 -1
template.py CHANGED
@@ -1 +1,125 @@
1
- # TODO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from datasets import load_dataset
3
+ from openai import OpenAI
4
+ from together import Together
5
+
6
+ # --- Dataset Configuration (NOT TO BE CHANGED) ---
7
+ GT_DATASET_NAME = "kostis-init/CP-Bench"
8
+ DATASET_SPLIT = "train"
9
+ PROBLEM_ID_COLUMN = "id"
10
+ PROBLEM_DESCRIPTION_COLUMN = "description"
11
+ PROBLEM_DATA_COLUMN = "input_data"
12
+ PROBLEM_DECISION_VARS_COLUMN = "decision_variables"
13
+ # --- End of Dataset Configuration ---
14
+
15
+ #######################################################################
16
+ # This is an example script to generate constraint models using LLMs. #
17
+ # You can use this as a starting point for your own approach. #
18
+ #######################################################################
19
+
20
+ # --- Submission Approach Configuration ---
21
+ LLM_CLIENT = OpenAI(api_key='YOUR_API_KEY') # TODO: Replace with your OpenAI API key, and/or use another LLM client (e.g. Together AI)
22
+ LLM_ID = "gpt-4o" # TODO: Choose the LLM model you want to use
23
+ LLM_TEMPERATURE = 0.5 # Controls the randomness of the output (the lower, the more deterministic)
24
+ LLM_SEED = 42 # Seed for reproducibility (optional, but recommended)
25
+ LLM_MAX_TOKENS = 3000 # Maximum number of tokens in the generated model (adjust based on your needs)
26
+ LLM_TOP_P = 1.0 # Top-p sampling parameter (1.0 means no filtering)
27
+
28
+ TARGET_MODELLING_FRAMEWORK = "CPMpy"
29
+ OUTPUT_FILE = "template_submission.jsonl"
30
+
31
+ # This is the main instruction given to the LLM to generate the model.
32
+ SYSTEM_PROMPT_TEMPLATE = f"""You are an expert in constraint programming.
33
+ Your task is to convert the given natural language problem description into a complete and runnable {TARGET_MODELLING_FRAMEWORK} model.
34
+ The model should be self-contained.
35
+ If the problem implies specific data instances (e.g., sizes, arrays from the description), embed them directly into the model's data section or as fixed parameters.
36
+ The model must print its solution as a JSON object to standard output.
37
+ The keys in the JSON output should correspond to the decision variables relevant to the problem, which will be provided in the problem description.
38
+ Do not include any explanations or introductory text, just the model code between triple backticks.
39
+ For example:
40
+ ```python
41
+ ... (the model code here) ...
42
+ ```
43
+ """
44
+
45
+
46
+ def generate_model_with_llm(problem_id: str, description: str, input_data: str, decision_variables: [str]) -> str:
47
+
48
+ user_prompt = f"Problem Description:\n{description}\n\n"
49
+ if decision_variables:
50
+ user_prompt += (f"The solution should be a JSON object. "
51
+ f"The key(s) should strictly be: {', '.join(decision_variables)}.\n\n")
52
+ if input_data:
53
+ user_prompt += f"Input Data:\n{input_data}\n\n"
54
+ user_prompt += f"Generate the {TARGET_MODELLING_FRAMEWORK} model."
55
+
56
+ messages = [
57
+ {"role": "system", "content": SYSTEM_PROMPT_TEMPLATE},
58
+ {"role": "user", "content": user_prompt}
59
+ ]
60
+
61
+ try:
62
+ print(f" [LLM Call] Generating model for problem: {problem_id} (Framework: {TARGET_MODELLING_FRAMEWORK})...")
63
+ response = LLM_CLIENT.chat.completions.create(
64
+ messages=messages,
65
+ model=LLM_ID,
66
+ temperature=LLM_TEMPERATURE,
67
+ seed=LLM_SEED,
68
+ max_tokens=LLM_MAX_TOKENS,
69
+ top_p=LLM_TOP_P
70
+ )
71
+ max_tokens_reached = (response.choices[0].finish_reason == 'length')
72
+ if max_tokens_reached:
73
+ print(f" [LLM Call] Warning: Max tokens reached for problem {problem_id}. The model may be incomplete.")
74
+
75
+ generated_model = response.choices[0].message.content.strip()
76
+ if '```python' in generated_model:
77
+ generated_model = generated_model.split('```python')[1].split('```')[0].strip()
78
+
79
+ print(f" [LLM Call] Successfully generated model for problem: {problem_id}")
80
+ return generated_model
81
+ except Exception as e:
82
+ print(f" [LLM Call] Error generating model for problem {problem_id}: {type(e).__name__} - {e}")
83
+ return f"# Error generating model for problem {problem_id}: {type(e).__name__} - {e}\n# Please check the LLM configuration and try again."
84
+
85
+
86
+ def main():
87
+ print(f"Starting model generation script for {TARGET_MODELLING_FRAMEWORK}.")
88
+ print(f"Loading dataset '{GT_DATASET_NAME}'...")
89
+
90
+ try:
91
+ dataset = load_dataset(GT_DATASET_NAME, split=DATASET_SPLIT, trust_remote_code=True)
92
+ print(f"Dataset loaded. Number of problems: {len(dataset)}")
93
+ except Exception as e:
94
+ print(f"CRITICAL ERROR - Failed to load dataset '{GT_DATASET_NAME}': {type(e).__name__} - {e}")
95
+ return
96
+
97
+ print(f"Generating models and writing to '{OUTPUT_FILE}'...")
98
+ count_generated = 0
99
+ with open(OUTPUT_FILE, 'w', encoding='utf-8') as f_out:
100
+ for i, item in enumerate(dataset):
101
+ problem_id = item.get(PROBLEM_ID_COLUMN)
102
+ description = item.get(PROBLEM_DESCRIPTION_COLUMN)
103
+ input_data = item.get(PROBLEM_DATA_COLUMN)
104
+ decision_vars = item.get(PROBLEM_DECISION_VARS_COLUMN)
105
+
106
+ if not problem_id or description is None:
107
+ print(f" Skipping item {i + 1} due to missing ID or description (is None). Problem data: {item}")
108
+ continue
109
+
110
+ print(f"\nProcessing problem {i + 1}/{len(dataset)}: ID = '{problem_id}'")
111
+
112
+ generated_model_str = generate_model_with_llm(problem_id, description, input_data, decision_vars)
113
+
114
+ submission_entry = {
115
+ "id": problem_id,
116
+ "model": generated_model_str
117
+ }
118
+ f_out.write(json.dumps(submission_entry) + '\n')
119
+ count_generated += 1
120
+
121
+ print(f"\nProcessing complete. {count_generated} models generated and saved to '{OUTPUT_FILE}'.")
122
+
123
+
124
+ if __name__ == "__main__":
125
+ main()