Spaces:

kostis-init
/

CP-Bench-Leaderboard

Running

App Files Files Community

CP-Bench-Leaderboard / template.py

kostis-init

add template doc

02c81d2 25 days ago

raw

history blame

5.83 kB

	import json
	from datasets import load_dataset
	from openai import OpenAI
	from together import Together

	# --- Dataset Configuration (NOT TO BE CHANGED) ---
	GT_DATASET_NAME = "kostis-init/CP-Bench"
	DATASET_SPLIT = "train"
	PROBLEM_ID_COLUMN = "id"
	PROBLEM_DESCRIPTION_COLUMN = "description"
	PROBLEM_DATA_COLUMN = "input_data"
	PROBLEM_DECISION_VARS_COLUMN = "decision_variables"
	# --- End of Dataset Configuration ---

	#######################################################################
	# This is an example script to generate constraint models using LLMs. #
	# You can use this as a starting point for your own approach. #
	#######################################################################

	# --- Submission Approach Configuration ---
	LLM_CLIENT = OpenAI(api_key='YOUR_API_KEY') # TODO: Replace with your OpenAI API key, and/or use another LLM client (e.g. Together AI)
	LLM_ID = "gpt-4o" # TODO: Choose the LLM model you want to use
	LLM_TEMPERATURE = 0.5 # Controls the randomness of the output (the lower, the more deterministic)
	LLM_SEED = 42 # Seed for reproducibility (optional, but recommended)
	LLM_MAX_TOKENS = 3000 # Maximum number of tokens in the generated model (adjust based on your needs)
	LLM_TOP_P = 1.0 # Top-p sampling parameter (1.0 means no filtering)

	TARGET_MODELLING_FRAMEWORK = "CPMpy"
	OUTPUT_FILE = "template_submission.jsonl"

	# TODO: Write the main instruction given to the LLM to generate the model.
	SYSTEM_PROMPT_TEMPLATE = f"""You are an expert in constraint programming.
	Your task is to convert the given natural language problem description into a complete and runnable {TARGET_MODELLING_FRAMEWORK} model.
	The model should be self-contained.
	If the problem implies specific data instances (e.g., sizes, arrays from the description), embed them directly into the model's data section or as fixed parameters.
	The model must print its solution as a JSON object to standard output.
	The keys in the JSON output should correspond to the decision variables relevant to the problem, which will be provided in the problem description.
	Do not include any explanations or introductory text, just the model code between triple backticks.
	For example:
	```python
	... (the model code here) ...
	```
	"""

	# TODO: Write your approach.
	def generate_model_with_llm(problem_id: str, description: str, input_data: str, decision_variables: [str]) -> str:

	user_prompt = f"Problem Description:\n{description}\n\n"
	if decision_variables:
	user_prompt += (f"The solution should be a JSON object. "
	f"The key(s) should strictly be: {', '.join(decision_variables)}.\n\n")
	if input_data:
	user_prompt += f"Input Data:\n{input_data}\n\n"
	user_prompt += f"Generate the {TARGET_MODELLING_FRAMEWORK} model."

	messages = [
	{"role": "system", "content": SYSTEM_PROMPT_TEMPLATE},
	{"role": "user", "content": user_prompt}
	]

	try:
	print(f" [LLM Call] Generating model for problem: {problem_id} (Framework: {TARGET_MODELLING_FRAMEWORK})...")
	response = LLM_CLIENT.chat.completions.create(
	messages=messages,
	model=LLM_ID,
	temperature=LLM_TEMPERATURE,
	seed=LLM_SEED,
	max_tokens=LLM_MAX_TOKENS,
	top_p=LLM_TOP_P
	)
	max_tokens_reached = (response.choices[0].finish_reason == 'length')
	if max_tokens_reached:
	print(f" [LLM Call] Warning: Max tokens reached for problem {problem_id}. The model may be incomplete.")

	generated_model = response.choices[0].message.content.strip()
	if '```python' in generated_model:
	generated_model = generated_model.split('```python')[1].split('```')[0].strip()

	print(f" [LLM Call] Successfully generated model for problem: {problem_id}")
	return generated_model
	except Exception as e:
	print(f" [LLM Call] Error generating model for problem {problem_id}: {type(e).__name__} - {e}")
	return f"# Error generating model for problem {problem_id}: {type(e).__name__} - {e}\n# Please check the LLM configuration and try again."

	# --- Main Function (no need to change) ---
	def main():
	print(f"Starting model generation script for {TARGET_MODELLING_FRAMEWORK}.")
	print(f"Loading dataset '{GT_DATASET_NAME}'...")

	try:
	dataset = load_dataset(GT_DATASET_NAME, split=DATASET_SPLIT, trust_remote_code=True)
	print(f"Dataset loaded. Number of problems: {len(dataset)}")
	except Exception as e:
	print(f"CRITICAL ERROR - Failed to load dataset '{GT_DATASET_NAME}': {type(e).__name__} - {e}")
	return

	print(f"Generating models and writing to '{OUTPUT_FILE}'...")
	count_generated = 0
	with open(OUTPUT_FILE, 'w', encoding='utf-8') as f_out:
	for i, item in enumerate(dataset):
	problem_id = item.get(PROBLEM_ID_COLUMN)
	description = item.get(PROBLEM_DESCRIPTION_COLUMN)
	input_data = item.get(PROBLEM_DATA_COLUMN)
	decision_vars = item.get(PROBLEM_DECISION_VARS_COLUMN)

	if not problem_id or description is None:
	print(f" Skipping item {i + 1} due to missing ID or description (is None). Problem data: {item}")
	continue

	print(f"\nProcessing problem {i + 1}/{len(dataset)}: ID = '{problem_id}'")

	generated_model_str = generate_model_with_llm(problem_id, description, input_data, decision_vars)

	submission_entry = {
	"id": problem_id,
	"model": generated_model_str
	}
	f_out.write(json.dumps(submission_entry) + '\n')
	count_generated += 1

	print(f"\nProcessing complete. {count_generated} models generated and saved to '{OUTPUT_FILE}'.")


	if __name__ == "__main__":
	main()