update leaderboard entry parsing, enhance LLM client configuration, and correct submission file link
60a95c1
import json | |
from datasets import load_dataset | |
from openai import OpenAI | |
from together import Together | |
# === DATASET CONFIGURATION (DO NOT MODIFY) === | |
GT_DATASET_NAME = "kostis-init/CP-Bench" | |
DATASET_SPLIT = "train" | |
PROBLEM_ID_COLUMN = "id" | |
PROBLEM_DESCRIPTION_COLUMN = "description" | |
PROBLEM_DATA_COLUMN = "input_data" | |
PROBLEM_DECISION_VARS_COLUMN = "decision_variables" | |
# ============================================== | |
####################################################################### | |
# Template script to generate constraint models using LLMs. # | |
# You can use this as a starting point for your own approach. # | |
####################################################################### | |
# === CHOOSE LLM CLIENT AND MODEL CONFIGURATION === | |
# TODO: Uncomment and configure the LLM client you want to use. | |
# Example 1: OpenAI (e.g., GPT-4o) | |
# LLM_CLIENT = OpenAI(api_key="YOUR_API_KEY") | |
# LLM_ID = "gpt-4o" | |
# Example 2: DeepSeek (or any other OpenAI-compatible API) | |
# LLM_CLIENT = OpenAI(api_key="DEEPSEEK_API_KEY", base_url="https://api.deepseek.com") | |
# LLM_ID = "deepseek-chat" | |
# Example 3: Together.ai | |
LLM_CLIENT = Together(api_key="TOGETHER_API_KEY") | |
LLM_ID = "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free" | |
LLM_TEMPERATURE = 0.5 # Controls the randomness of the output (the lower, the more deterministic) | |
LLM_SEED = 42 # Seed for reproducibility (optional, but recommended) | |
LLM_MAX_TOKENS = 3000 # Maximum number of tokens in the generated model (adjust based on your needs) | |
LLM_TOP_P = 1.0 # Top-p sampling parameter (1.0 means no filtering) | |
TARGET_MODELLING_FRAMEWORK = "CPMpy" | |
OUTPUT_FILE = "template_submission.jsonl" | |
# === TODO: Write the main instruction given to the LLM to generate the model. === | |
SYSTEM_PROMPT_TEMPLATE = f"""You are an expert in constraint programming. | |
Your task is to convert the given natural language problem description into a complete and runnable {TARGET_MODELLING_FRAMEWORK} model. | |
The model should be self-contained. | |
If the problem implies specific data instances (e.g., sizes, arrays from the description), embed them directly into the model's data section or as fixed parameters. | |
The model must print its solution as a JSON object to standard output. | |
The keys in the JSON output should correspond to the decision variables relevant to the problem, which will be provided in the problem description. | |
Do not include any explanations or introductory text, just the model code between triple backticks. | |
For example: | |
```python | |
# model code here | |
``` | |
""" | |
# === MAIN LOGIC, TODO: You can adapt this function to try different prompting strategies === | |
def generate_model_with_llm(problem_id: str, description: str, input_data: str, decision_variables: [str]) -> str: | |
user_prompt = f"Problem Description:\n{description}\n\n" | |
if input_data: | |
user_prompt += f"Input Data:\n{input_data}\n\n" | |
if decision_variables: | |
user_prompt += f"The model must output a JSON with these keys: {', '.join(decision_variables)}.\n\n" | |
user_prompt += f"Generate the {TARGET_MODELLING_FRAMEWORK} model." | |
messages = [ | |
{"role": "system", "content": SYSTEM_PROMPT_TEMPLATE}, | |
{"role": "user", "content": user_prompt} | |
] | |
try: | |
print(f" [LLM Call] Generating model for problem: {problem_id} (Framework: {TARGET_MODELLING_FRAMEWORK})...") | |
response = LLM_CLIENT.chat.completions.create( | |
messages=messages, | |
model=LLM_ID, | |
temperature=LLM_TEMPERATURE, | |
seed=LLM_SEED, | |
max_tokens=LLM_MAX_TOKENS, | |
top_p=LLM_TOP_P | |
) | |
max_tokens_reached = (response.choices[0].finish_reason == 'length') | |
if max_tokens_reached: | |
print(f" [LLM Call] Warning: Max tokens reached for problem {problem_id}. The model may be incomplete.") | |
generated_model = response.choices[0].message.content.strip() | |
if '```python' in generated_model: | |
generated_model = generated_model.split('```python')[1].split('```')[0].strip() | |
print(f" [LLM Call] Successfully generated model for problem: {problem_id}") | |
return generated_model | |
except Exception as e: | |
print(f" [LLM Call] Error generating model for problem {problem_id}: {type(e).__name__} - {e}") | |
return f"# Error generating model for problem {problem_id}: {type(e).__name__} - {e}\n# Please check the LLM configuration and try again." | |
# === MAIN EXECUTION LOOP (No need to change) === | |
def main(): | |
print(f"Starting model generation script for {TARGET_MODELLING_FRAMEWORK}.") | |
print(f"Loading dataset '{GT_DATASET_NAME}'...") | |
try: | |
dataset = load_dataset(GT_DATASET_NAME, split=DATASET_SPLIT, trust_remote_code=True) | |
print(f"Dataset loaded. Number of problems: {len(dataset)}") | |
except Exception as e: | |
print(f"CRITICAL ERROR - Failed to load dataset '{GT_DATASET_NAME}': {type(e).__name__} - {e}") | |
return | |
print(f"Generating models and writing to '{OUTPUT_FILE}'...") | |
count_generated = 0 | |
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f_out: | |
for i, item in enumerate(dataset): | |
problem_id = item.get(PROBLEM_ID_COLUMN) | |
description = item.get(PROBLEM_DESCRIPTION_COLUMN) | |
input_data = item.get(PROBLEM_DATA_COLUMN) | |
decision_vars = item.get(PROBLEM_DECISION_VARS_COLUMN) | |
if not problem_id or description is None: | |
print(f" Skipping item {i + 1} due to missing ID or description (is None). Problem data: {item}") | |
continue | |
print(f"\nProcessing problem {i + 1}/{len(dataset)}: ID = '{problem_id}'") | |
generated_model_str = generate_model_with_llm(problem_id, description, input_data, decision_vars) | |
submission_entry = { | |
"id": problem_id, | |
"model": generated_model_str | |
} | |
f_out.write(json.dumps(submission_entry) + '\n') | |
count_generated += 1 | |
print(f"\nProcessing complete. {count_generated} models generated and saved to '{OUTPUT_FILE}'.") | |
if __name__ == "__main__": | |
main() |