Spaces:

kostis-init
/

CP-Bench-Leaderboard

Running

App Files Files Community

kostis-init commited on 15 days ago

Commit

57cfafe

1 Parent(s): a119cd7

add template script for generating constraint models using LLMs

Browse files

Files changed (1) hide show

template.py +125 -1

template.py CHANGED Viewed

	@@ -1 +1,125 @@
1	- # ~~TODO~~

+import json
+from datasets import load_dataset
+from openai import OpenAI
+from together import Together
+# --- Dataset Configuration (NOT TO BE CHANGED) ---
+GT_DATASET_NAME = "kostis-init/CP-Bench"
+DATASET_SPLIT = "train"
+PROBLEM_ID_COLUMN = "id"
+PROBLEM_DESCRIPTION_COLUMN = "description"
+PROBLEM_DATA_COLUMN = "input_data"
+PROBLEM_DECISION_VARS_COLUMN = "decision_variables"
+# --- End of Dataset Configuration ---
+#######################################################################
+# This is an example script to generate constraint models using LLMs. #
+# You can use this as a starting point for your own approach.         #
+#######################################################################
+# --- Submission Approach Configuration ---
+LLM_CLIENT = OpenAI(api_key='YOUR_API_KEY')  # TODO: Replace with your OpenAI API key, and/or use another LLM client (e.g. Together AI)
+LLM_ID = "gpt-4o"  # TODO: Choose the LLM model you want to use
+LLM_TEMPERATURE = 0.5  # Controls the randomness of the output (the lower, the more deterministic)
+LLM_SEED = 42  # Seed for reproducibility (optional, but recommended)
+LLM_MAX_TOKENS = 3000  # Maximum number of tokens in the generated model (adjust based on your needs)
+LLM_TOP_P = 1.0  # Top-p sampling parameter (1.0 means no filtering)
+TARGET_MODELLING_FRAMEWORK = "CPMpy"
+OUTPUT_FILE = "template_submission.jsonl"
+# This is the main instruction given to the LLM to generate the model.
+SYSTEM_PROMPT_TEMPLATE = f"""You are an expert in constraint programming.
+Your task is to convert the given natural language problem description into a complete and runnable {TARGET_MODELLING_FRAMEWORK} model.
+The model should be self-contained.
+If the problem implies specific data instances (e.g., sizes, arrays from the description), embed them directly into the model's data section or as fixed parameters.
+The model must print its solution as a JSON object to standard output.
+The keys in the JSON output should correspond to the decision variables relevant to the problem, which will be provided in the problem description.
+Do not include any explanations or introductory text, just the model code between triple backticks.
+For example:
+```python
+... (the model code here) ...
+```
+"""
+def generate_model_with_llm(problem_id: str, description: str, input_data: str, decision_variables: [str]) -> str:
+    user_prompt = f"Problem Description:\n{description}\n\n"
+    if decision_variables:
+        user_prompt += (f"The solution should be a JSON object. "
+                        f"The key(s) should strictly be: {', '.join(decision_variables)}.\n\n")
+    if input_data:
+        user_prompt += f"Input Data:\n{input_data}\n\n"
+    user_prompt += f"Generate the {TARGET_MODELLING_FRAMEWORK} model."
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT_TEMPLATE},
+        {"role": "user", "content": user_prompt}
+    ]
+    try:
+        print(f"  [LLM Call] Generating model for problem: {problem_id} (Framework: {TARGET_MODELLING_FRAMEWORK})...")
+        response = LLM_CLIENT.chat.completions.create(
+            messages=messages,
+            model=LLM_ID,
+            temperature=LLM_TEMPERATURE,
+            seed=LLM_SEED,
+            max_tokens=LLM_MAX_TOKENS,
+            top_p=LLM_TOP_P
+        )
+        max_tokens_reached = (response.choices[0].finish_reason == 'length')
+        if max_tokens_reached:
+            print(f"  [LLM Call] Warning: Max tokens reached for problem {problem_id}. The model may be incomplete.")
+        generated_model = response.choices[0].message.content.strip()
+        if '```python' in generated_model:
+            generated_model = generated_model.split('```python')[1].split('```')[0].strip()
+        print(f"  [LLM Call] Successfully generated model for problem: {problem_id}")
+        return generated_model
+    except Exception as e:
+        print(f"  [LLM Call] Error generating model for problem {problem_id}: {type(e).__name__} - {e}")
+        return f"# Error generating model for problem {problem_id}: {type(e).__name__} - {e}\n# Please check the LLM configuration and try again."
+def main():
+    print(f"Starting model generation script for {TARGET_MODELLING_FRAMEWORK}.")
+    print(f"Loading dataset '{GT_DATASET_NAME}'...")
+    try:
+        dataset = load_dataset(GT_DATASET_NAME, split=DATASET_SPLIT, trust_remote_code=True)
+        print(f"Dataset loaded. Number of problems: {len(dataset)}")
+    except Exception as e:
+        print(f"CRITICAL ERROR - Failed to load dataset '{GT_DATASET_NAME}': {type(e).__name__} - {e}")
+        return
+    print(f"Generating models and writing to '{OUTPUT_FILE}'...")
+    count_generated = 0
+    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f_out:
+        for i, item in enumerate(dataset):
+            problem_id = item.get(PROBLEM_ID_COLUMN)
+            description = item.get(PROBLEM_DESCRIPTION_COLUMN)
+            input_data = item.get(PROBLEM_DATA_COLUMN)
+            decision_vars = item.get(PROBLEM_DECISION_VARS_COLUMN)
+            if not problem_id or description is None:
+                print(f"  Skipping item {i + 1} due to missing ID or description (is None). Problem data: {item}")
+                continue
+            print(f"\nProcessing problem {i + 1}/{len(dataset)}: ID = '{problem_id}'")
+            generated_model_str = generate_model_with_llm(problem_id, description, input_data, decision_vars)
+            submission_entry = {
+                "id": problem_id,
+                "model": generated_model_str
+            }
+            f_out.write(json.dumps(submission_entry) + '\n')
+            count_generated += 1
+    print(f"\nProcessing complete. {count_generated} models generated and saved to '{OUTPUT_FILE}'.")
+if __name__ == "__main__":
+    main()