Spaces:

kostis-init
/

CP-Bench-Leaderboard

Running

App Files Files Community

CP-Bench-Leaderboard / template.py

kostis-init

update leaderboard entry parsing, enhance LLM client configuration, and correct submission file link

60a95c1 5 days ago

raw

history blame contribute delete

6.13 kB

	import json
	from datasets import load_dataset
	from openai import OpenAI
	from together import Together

	# === DATASET CONFIGURATION (DO NOT MODIFY) ===
	GT_DATASET_NAME = "kostis-init/CP-Bench"
	DATASET_SPLIT = "train"
	PROBLEM_ID_COLUMN = "id"
	PROBLEM_DESCRIPTION_COLUMN = "description"
	PROBLEM_DATA_COLUMN = "input_data"
	PROBLEM_DECISION_VARS_COLUMN = "decision_variables"
	# ==============================================

	#######################################################################
	# Template script to generate constraint models using LLMs. #
	# You can use this as a starting point for your own approach. #
	#######################################################################

	# === CHOOSE LLM CLIENT AND MODEL CONFIGURATION ===
	# TODO: Uncomment and configure the LLM client you want to use.

	# Example 1: OpenAI (e.g., GPT-4o)
	# LLM_CLIENT = OpenAI(api_key="YOUR_API_KEY")
	# LLM_ID = "gpt-4o"

	# Example 2: DeepSeek (or any other OpenAI-compatible API)
	# LLM_CLIENT = OpenAI(api_key="DEEPSEEK_API_KEY", base_url="https://api.deepseek.com")
	# LLM_ID = "deepseek-chat"

	# Example 3: Together.ai
	LLM_CLIENT = Together(api_key="TOGETHER_API_KEY")
	LLM_ID = "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"

	LLM_TEMPERATURE = 0.5 # Controls the randomness of the output (the lower, the more deterministic)
	LLM_SEED = 42 # Seed for reproducibility (optional, but recommended)
	LLM_MAX_TOKENS = 3000 # Maximum number of tokens in the generated model (adjust based on your needs)
	LLM_TOP_P = 1.0 # Top-p sampling parameter (1.0 means no filtering)

	TARGET_MODELLING_FRAMEWORK = "CPMpy"
	OUTPUT_FILE = "template_submission.jsonl"

	# === TODO: Write the main instruction given to the LLM to generate the model. ===

	SYSTEM_PROMPT_TEMPLATE = f"""You are an expert in constraint programming.
	Your task is to convert the given natural language problem description into a complete and runnable {TARGET_MODELLING_FRAMEWORK} model.
	The model should be self-contained.
	If the problem implies specific data instances (e.g., sizes, arrays from the description), embed them directly into the model's data section or as fixed parameters.
	The model must print its solution as a JSON object to standard output.
	The keys in the JSON output should correspond to the decision variables relevant to the problem, which will be provided in the problem description.
	Do not include any explanations or introductory text, just the model code between triple backticks.
	For example:
	```python
	# model code here
	```
	"""

	# === MAIN LOGIC, TODO: You can adapt this function to try different prompting strategies ===
	def generate_model_with_llm(problem_id: str, description: str, input_data: str, decision_variables: [str]) -> str:

	user_prompt = f"Problem Description:\n{description}\n\n"
	if input_data:
	user_prompt += f"Input Data:\n{input_data}\n\n"
	if decision_variables:
	user_prompt += f"The model must output a JSON with these keys: {', '.join(decision_variables)}.\n\n"
	user_prompt += f"Generate the {TARGET_MODELLING_FRAMEWORK} model."

	messages = [
	{"role": "system", "content": SYSTEM_PROMPT_TEMPLATE},
	{"role": "user", "content": user_prompt}
	]

	try:
	print(f" [LLM Call] Generating model for problem: {problem_id} (Framework: {TARGET_MODELLING_FRAMEWORK})...")
	response = LLM_CLIENT.chat.completions.create(
	messages=messages,
	model=LLM_ID,
	temperature=LLM_TEMPERATURE,
	seed=LLM_SEED,
	max_tokens=LLM_MAX_TOKENS,
	top_p=LLM_TOP_P
	)
	max_tokens_reached = (response.choices[0].finish_reason == 'length')
	if max_tokens_reached:
	print(f" [LLM Call] Warning: Max tokens reached for problem {problem_id}. The model may be incomplete.")

	generated_model = response.choices[0].message.content.strip()
	if '```python' in generated_model:
	generated_model = generated_model.split('```python')[1].split('```')[0].strip()

	print(f" [LLM Call] Successfully generated model for problem: {problem_id}")
	return generated_model
	except Exception as e:
	print(f" [LLM Call] Error generating model for problem {problem_id}: {type(e).__name__} - {e}")
	return f"# Error generating model for problem {problem_id}: {type(e).__name__} - {e}\n# Please check the LLM configuration and try again."

	# === MAIN EXECUTION LOOP (No need to change) ===
	def main():
	print(f"Starting model generation script for {TARGET_MODELLING_FRAMEWORK}.")
	print(f"Loading dataset '{GT_DATASET_NAME}'...")

	try:
	dataset = load_dataset(GT_DATASET_NAME, split=DATASET_SPLIT, trust_remote_code=True)
	print(f"Dataset loaded. Number of problems: {len(dataset)}")
	except Exception as e:
	print(f"CRITICAL ERROR - Failed to load dataset '{GT_DATASET_NAME}': {type(e).__name__} - {e}")
	return

	print(f"Generating models and writing to '{OUTPUT_FILE}'...")
	count_generated = 0
	with open(OUTPUT_FILE, 'w', encoding='utf-8') as f_out:
	for i, item in enumerate(dataset):
	problem_id = item.get(PROBLEM_ID_COLUMN)
	description = item.get(PROBLEM_DESCRIPTION_COLUMN)
	input_data = item.get(PROBLEM_DATA_COLUMN)
	decision_vars = item.get(PROBLEM_DECISION_VARS_COLUMN)

	if not problem_id or description is None:
	print(f" Skipping item {i + 1} due to missing ID or description (is None). Problem data: {item}")
	continue

	print(f"\nProcessing problem {i + 1}/{len(dataset)}: ID = '{problem_id}'")

	generated_model_str = generate_model_with_llm(problem_id, description, input_data, decision_vars)

	submission_entry = {
	"id": problem_id,
	"model": generated_model_str
	}
	f_out.write(json.dumps(submission_entry) + '\n')
	count_generated += 1

	print(f"\nProcessing complete. {count_generated} models generated and saved to '{OUTPUT_FILE}'.")


	if __name__ == "__main__":
	main()