Spaces:

kostis-init
/

CP-Bench-Leaderboard

Running

App Files Files Community

kostis-init commited on 5 days ago

Commit

60a95c1

1 Parent(s): 2e2392c

update leaderboard entry parsing, enhance LLM client configuration, and correct submission file link

Browse files

Files changed (7) hide show

mnz_sample_submission.jsonl +0 -2
sample_submission.jsonl +0 -0
small_sample_submission.jsonl +0 -5
src/hf_utils.py +3 -1
src/ui.py +1 -1
template.py +3 -5
template_submission.jsonl +0 -0

mnz_sample_submission.jsonl DELETED Viewed

	@@ -1,2 +0,0 @@
1	- {"id": "csplib__csplib_001_car_sequencing","model": "%% Data\narray[1..5] of int: at_most = [1, 2, 2, 2, 1];\narray[1..5] of int: per_slots = [2, 3, 3, 5, 5];\narray[1..6] of int: demand = [1, 1, 2, 2, 2, 2];\narray[1..6, 1..5] of int: requires = \n [\|1, 0, 1, 1, 0\|\n 0, 0, 0, 1, 0\|\n 0, 1, 0, 0, 1\|\n 0, 1, 0, 1, 0\|\n 1, 0, 1, 0, 0\|\n 1, 1, 0, 0, 0\|];\n\nint: n_types = 6;\nint: n_options = 5;\nint: n_cars = sum(demand);\nset of int: Cars = 1..n_cars;\nset of int: Options = 1..n_options;\nset of int: Types = 1..n_types;\n\n%% Decision variables\narray[Cars] of var Types: sequence;\narray[Cars, Options] of var bool: setup;\n\n%% Constraints\nconstraint\n forall(t in Types) (\n count(sequence, t) = demand[t]\n );\n\nconstraint\n forall(c in Cars, o in Options) (\n setup[c, o] = requires[sequence[c], o]\n );\n\nconstraint\n forall(o in Options) (\n forall(s in 1..n_cars - per_slots[o] + 1) (\n sum(i in s..s + per_slots[o] - 1)(bool2int(setup[i, o])) <= at_most[o]\n )\n );\n\nsolve satisfy;\n\noutput [\n \"{ \\\"sequence\\\": \", show([sequence[c] - 1 \| c in Cars]), \" }\"\n];"}
2	- {"id": "csplib__csplib_002_template_design","model": "%% Data\nint: n_slots = 9;\nint: n_templates = 2;\nint: n_var = 7;\narray[1..n_var] of int: demand = [250, 255, 260, 500, 500, 800, 1100];\nint: ub = max(demand);\n\n%% Sets\nset of int: Templates = 1..n_templates;\nset of int: Variations = 1..n_var;\n\n%% Decision variables\narray[Templates] of var 1..ub: production;\narray[Templates, Variations] of var 0..n_slots: layout;\n\n%% Constraints\nconstraint\n forall(t in Templates) (\n sum(v in Variations)(layout[t, v]) = n_slots\n );\n\nconstraint\n forall(v in Variations) (\n sum(t in Templates)(production[t] * layout[t, v]) >= demand[v]\n );\n\n%% Break symmetry for equal demand\nconstraint\n forall(i in 1..n_var-1 where demand[i] == demand[i+1]) (\n layout[1, i] <= layout[1, i+1] /\\\n forall(t in 1..n_templates-1)(\n (layout[t, i] = layout[t, i+1]) -> (layout[t+1, i] <= layout[t+1, i+1])\n )\n );\n\n%% Distinguish templates\nconstraint\n forall(i in 1..n_templates-1)(production[i] <= production[i+1]);\n\n%% Static symmetry for increasing demand\nconstraint\n forall(i in 1..n_var-1 where demand[i] < demand[i+1]) (\n sum(t in Templates)(production[t] * layout[t, i]) <= sum(t in Templates)(production[t] * layout[t, i+1])\n );\n\n%% Objective\nsolve minimize sum(t in Templates)(production[t]);\n\n%% Output\noutput [\n \"{ \\\"production\\\": \", show([production[t] \| t in Templates]), \", \\\"layout\\\": \", show([ [layout[t,v] \| v in Variations] \| t in Templates ]), \" }\"\n];"}

sample_submission.jsonl DELETED Viewed

The diff for this file is too large to render. See raw diff

small_sample_submission.jsonl DELETED Viewed

@@ -1,5 +0,0 @@
-{"id": "csplib__csplib_001_car_sequencing", "model": "# Data\nat_most = [1, 2, 2, 2, 1]  # The amount of times a property can be present\n# in a group of consecutive timeslots (see next variable)\nper_slots = [2, 3, 3, 5, 5]  # The amount of consecutive timeslots\ndemand = [1, 1, 2, 2, 2, 2]  # The demand per type of car\nrequires = [[1, 0, 1, 1, 0],\n            [0, 0, 0, 1, 0],\n            [0, 1, 0, 0, 1],\n            [0, 1, 0, 1, 0],\n            [1, 0, 1, 0, 0],\n            [1, 1, 0, 0, 0]]  # The properties per type of car\n# End of data\n\n# Import libraries\nfrom cpmpy import *\nimport json\n\n# Parameters\nn_cars = sum(demand)  # The amount of cars to sequence\nn_options = len(at_most)  # The amount of different options\nn_types = len(demand)  # The amount of different car types\nrequires = cpm_array(requires)  # For element constraint\n\n# Decision Variables\nsequence = intvar(0, n_types - 1, shape=n_cars, name=\"sequence\")  # The sequence of car types\nsetup = boolvar(shape=(n_cars, n_options), name=\"setup\")  # Sequence of different options based on the car type\n\n# Model\nmodel = Model()\n\n# The amount of each type of car in the sequence has to be equal to the demand for that type\nmodel += [sum(sequence == t) == demand[t] for t in range(n_types)]\n\n# Make sure that the options in the setup table correspond to those of the car type\nfor s in range(n_cars):\n    model += [setup[s, o] == requires[sequence[s], o] for o in range(n_options)]\n\n# Check that no more than \"at most\" car options are used per \"per_slots\" slots\nfor o in range(n_options):\n    for s in range(n_cars - per_slots[o]):\n        slot_range = range(s, s + per_slots[o])\n        model += (sum(setup[slot_range, o]) <= at_most[o])\n\n# Solve\nmodel.solve()\n\n# Print\nsolution = {\"sequence\": sequence.value().tolist()}\nprint(json.dumps(solution))\n# End of CPMPy script"}
-{"id": "csplib__csplib_002_template_design", "model": "# Data\nn_slots = 9  # The amount of slots on a template\nn_templates = 2  # The amount of templates\nn_var = 7  # The amount of different variations\ndemand = [250, 255, 260, 500, 500, 800, 1100]  # The demand per variation\n# End of data\n\n# Import libraries\nfrom cpmpy import *\nimport json\n\n# Parameters\nub = max(demand)  # The upper bound for the production\n\n# create model\nmodel = Model()\n\n# decision variables\nproduction = intvar(1, ub, shape=n_templates, name=\"production\")\nlayout = intvar(0, n_var, shape=(n_templates, n_var), name=\"layout\")\n\n# all slots are populated in a template\nmodel += all(sum(layout[i]) == n_slots for i in range(n_templates))\n\n# meet demand\nfor var in range(n_var):\n    model += sum(production * layout[:, var]) >= demand[var]\n\n# break symmetry\n# equal demand\nfor i in range(n_var - 1):\n    if demand[i] == demand[i + 1]:\n        model += layout[0, i] <= layout[0, i + 1]\n        for j in range(n_templates - 1):\n            model += (layout[j, i] == layout[j, i + 1]).implies(layout[j + 1, i] <= layout[j + 1, i + 1])\n\n# distinguish templates\nfor i in range(n_templates - 1):\n    model += production[i] <= production[i + 1]\n\n# static symmetry\nfor i in range(n_var - 1):\n    if demand[i] < demand[i + 1]:\n        model += sum(production * layout[:, i]) <= sum(production * layout[:, i + 1])\n\n# minimize number of printed sheets\nmodel.minimize(sum(production))\n\n# Solve\nmodel.solve()\n\n# Print\nsolution = {\"production\": production.value().tolist(), \"layout\": layout.value().tolist()}\nprint(json.dumps(solution))\n# End of CPMPy script"}
-{"id": "csplib__csplib_005_autocorrelation", "model": "# Data\nn = 10  # Length of the binary sequence\n# End of data\n\n# Import libraries\nfrom cpmpy import *\nimport numpy as np\nimport json\n\n\n# periodic auto correlation\ndef PAF(arr, s):\n    # roll the array 's' indices\n    return sum(arr * np.roll(arr, -s))\n\n\n# Decision Variables\nsequence = intvar(-1, 1, shape=n, name=\"sequence\")  # binary sequence\nE = sum([PAF(sequence, s) ** 2 for s in range(1, n)])  # energy value\n\nmodel = Model()\n\n# exclude 0\nmodel += sequence != 0\n\n# minimize sum of squares\nmodel.minimize(E)\n\n# Solve\nmodel.solve()\n\n# Print\nsolution = {\"sequence\": sequence.value().tolist(), \"E\": E.value()}\nprint(json.dumps(solution))\n# End of CPMPy script"}
-{"id": "csplib__csplib_006_golomb_rulers", "model": "# Data\nsize = 10  # Number of marks on the Golomb ruler\n# End of data\n\n# Import libraries\nfrom cpmpy import *\nimport json\n\n# Decision variables\nmarks = intvar(0, size * size, shape=size, name=\"marks\")\nlength = marks[-1]\n\n# Model\nmodel = Model()\n\n# first mark is 0\nmodel += (marks[0] == 0)\n\n# marks must be increasing\nmodel += marks[:-1] < marks[1:]\n\n# golomb constraint\ndiffs = [marks[j] - marks[i] for i in range(0, size - 1) for j in range(i + 1, size)]\nmodel += AllDifferent(diffs)\n\n# Symmetry breaking\nmodel += (marks[size - 1] - marks[size - 2] > marks[1] - marks[0])\nmodel += (diffs[0] < diffs[size - 1])\n\n# find optimal ruler\nmodel.minimize(length)\n\n# Solve\nmodel.solve()\n\n# Print\nsolution = {\"marks\": marks.value().tolist(), \"length\": length.value()}\nprint(json.dumps(solution))\n# End of CPMPy script"}
-{"id": "csplib__csplib_007_all_interval", "model": "# Data\nn = 12  # Number of pitch-classes\n# End of data\n\n# Import libraries\nfrom cpmpy import *\nimport numpy as np\nimport json\n\n# Create the solver\nmodel = Model()\n\n# Declare variables\nx = intvar(0, n - 1, shape=n, name=\"x\")  # Pitch-classes\ndiffs = intvar(1, n - 1, shape=n - 1, name=\"diffs\")  # Intervals\n\n# Constraints\nmodel += [AllDifferent(x),\n          AllDifferent(diffs)]\n\n# Differences between successive values\nmodel += diffs == np.abs(x[1:] - x[:-1])\n\n# Symmetry breaking\nmodel += [x[0] < x[-1]]  # Mirroring array is equivalent solution\nmodel += [diffs[0] < diffs[1]]  # Further symmetry breaking\n\n# Solve\nmodel.solve()\n\n# Print\nsolution = {\"x\": x.value().tolist(), \"diffs\": diffs.value().tolist()}\nprint(json.dumps(solution))\n# End of CPMPy script"}

src/hf_utils.py CHANGED Viewed

@@ -80,8 +80,10 @@ def load_leaderboard_data():
                         if 'Final Solution Accuracy' in line:
                             entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
                         if 'Submission coverage perc' in line:
-                            entry[LDB_COLS[1]] = int(line.split(":")[1].strip())
                 os.remove(local_summary_path)
             leaderboard_entries.append(entry)

                         if 'Final Solution Accuracy' in line:
                             entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
                         if 'Submission coverage perc' in line:
+                            entry[LDB_COLS[1]] = float(line.split(":")[1].strip().replace("%", ""))
                 os.remove(local_summary_path)
+            else:
+                print(f"Warning: Summary file {local_summary_path} does not exist or is empty.")
             leaderboard_entries.append(entry)

src/ui.py CHANGED Viewed

@@ -90,7 +90,7 @@ def create_ui():
             "   **Each line in the file should be a JSON object with two keys: `id` and `model`.**\n"
             "   * `id`: The ID of the problem exactly as it appears in the original dataset (e.g., `csplib__csplib_001_car_sequencing`).\n"
             "   * `model`: The generated model for the problem (as a string representing runnable code). Make sure that it eventually outputs the solution as a json with key(s) as described in the `decision_variables` entry and values as would be expected in the problem. This is part of the evaluation as well: unexpected keys, or value types are considered incorrect. This is because our automatic evaluation is based on the solution printed by the submitted models.\n"
-            "   * An example submission file can be found [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/sample_submission.jsonl).\n"
             "\n   To help you get started, we also provide a **template script [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/template.py)**. This script acts as a backbone, showing how to produce a simple, runnable submission for one of the problems. You can use it as a starting point for developing your own logic.\n"
             "5. **Check the leaderboard**: After uploading, it may take a few minutes for a submission to be evaluated and appear on the leaderboard.\n"
             "\n\n"

             "   **Each line in the file should be a JSON object with two keys: `id` and `model`.**\n"
             "   * `id`: The ID of the problem exactly as it appears in the original dataset (e.g., `csplib__csplib_001_car_sequencing`).\n"
             "   * `model`: The generated model for the problem (as a string representing runnable code). Make sure that it eventually outputs the solution as a json with key(s) as described in the `decision_variables` entry and values as would be expected in the problem. This is part of the evaluation as well: unexpected keys, or value types are considered incorrect. This is because our automatic evaluation is based on the solution printed by the submitted models.\n"
+            "   * An example submission file can be found [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/template_submission.jsonl).\n"
             "\n   To help you get started, we also provide a **template script [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/template.py)**. This script acts as a backbone, showing how to produce a simple, runnable submission for one of the problems. You can use it as a starting point for developing your own logic.\n"
             "5. **Check the leaderboard**: After uploading, it may take a few minutes for a submission to be evaluated and appear on the leaderboard.\n"
             "\n\n"

template.py CHANGED Viewed

@@ -18,6 +18,7 @@ PROBLEM_DECISION_VARS_COLUMN = "decision_variables"
 #######################################################################
 # === CHOOSE LLM CLIENT AND MODEL CONFIGURATION ===
 # Example 1: OpenAI (e.g., GPT-4o)
 # LLM_CLIENT = OpenAI(api_key="YOUR_API_KEY")
@@ -28,11 +29,8 @@ PROBLEM_DECISION_VARS_COLUMN = "decision_variables"
 # LLM_ID = "deepseek-chat"
 # Example 3: Together.ai
-# LLM_CLIENT = Together(api_key="TOGETHER_API_KEY")
-# LLM_ID = "mistralai/Mixtral-8x22B-Instruct-v0.1"
-LLM_CLIENT = OpenAI(api_key="YOUR_API_KEY")  # TODO: Set your API key or switch client above
-LLM_ID = "gpt-4o"                            # TODO: Change to your chosen model (name it as per the LLM provider's documentation)
 LLM_TEMPERATURE = 0.5  # Controls the randomness of the output (the lower, the more deterministic)
 LLM_SEED = 42  # Seed for reproducibility (optional, but recommended)

 #######################################################################
 # === CHOOSE LLM CLIENT AND MODEL CONFIGURATION ===
+# TODO: Uncomment and configure the LLM client you want to use.
 # Example 1: OpenAI (e.g., GPT-4o)
 # LLM_CLIENT = OpenAI(api_key="YOUR_API_KEY")
 # LLM_ID = "deepseek-chat"
 # Example 3: Together.ai
+LLM_CLIENT = Together(api_key="TOGETHER_API_KEY")
+LLM_ID = "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
 LLM_TEMPERATURE = 0.5  # Controls the randomness of the output (the lower, the more deterministic)
 LLM_SEED = 42  # Seed for reproducibility (optional, but recommended)

template_submission.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff