{ "cells": [ { "metadata": { "ExecuteTime": { "end_time": "2025-02-12T14:32:10.766770Z", "start_time": "2025-02-12T14:32:07.793979Z" } }, "cell_type": "code", "source": [ "from src.dataset.dataset import EventDataset\n", "from src.utils.paths import get_path" ], "id": "7b407a8095806d09", "outputs": [], "execution_count": 1 }, { "metadata": { "ExecuteTime": { "end_time": "2025-02-12T14:32:12.034102Z", "start_time": "2025-02-12T14:32:10.864499Z" } }, "cell_type": "code", "source": [ "model_output_file = \"/work/gkrzmanc/jetclustering/results/train/Eval_objectness_score_2025_02_12_11_50_03/eval_9.pkl\"\n", "\n", "model_clusters_file = None\n", "\n", "path = get_path(\"/pnfs/psi.ch/cms/trivcat/store/user/gkrzmanc/jetclustering/preprocessed_data/scouting_PFNano_signals2/SVJ_hadronic_std/s-channel_mMed-900_mDark-20_rinv-0.3\", \"preprocessed_data\")\n", "dataset = EventDataset.from_directory(path, model_clusters_file=model_clusters_file,\n", " model_output_file=model_output_file,\n", " include_model_jets_unfiltered=True)" ], "id": "8d275fbf162ad4fc", "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/work/gkrzmanc/jetclustering/code/src/utils/utils.py:91: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " return lambda b: torch.load(io.BytesIO(b), map_location='cpu')\n" ] } ], "execution_count": 2 }, { "metadata": { "ExecuteTime": { "end_time": "2025-02-12T14:32:12.482044Z", "start_time": "2025-02-12T14:32:12.246175Z" } }, "cell_type": "code", "source": [ "max_events = 1000\n", "import torch\n", "all_obj_scores = []\n", "target_obj_scores = []\n", "for i in range(len(dataset)):\n", " if i > max_events:\n", " break\n", " event = dataset[i]\n", " all_obj_scores += torch.sigmoid(event.model_jets.obj_score).tolist()\n", " assert len(event.model_jets) == len(event.model_jets.obj_score), f\"{len(event.model_jets)} {len(event.model_jets.obj_score)}\"\n", " target_obj_scores += event.model_jets.target_obj_score.tolist()\n" ], "id": "b4822c0dc7f98f51", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([ 2, 0, 0, 2, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 0, 0, 2, 2,\n", " 2, 0, 0, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 2, -1, 0, 0, 2,\n", " 0, 0, 2, 0, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2,\n", " 0, 2, 2, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 0, 1, 0,\n", " 2, 2, 0, 2, 1, 0, 0, 1, -1, 2, 2, 0, 2, 0, 1, 0, -1, 2,\n", " 2, 0, 2, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, 2, 2, 2, 0, 2,\n", " 2, 2, 2, 2, -1, 0, 2, 2, 0, 2, 2, 2, -1, 1, 0, 2, -1, 2,\n", " 2, -1, 2, -1, 0, 2, 0, 1, -1, 2, 0, 2, -1, 1, 2, 0, 2, -1,\n", " 2, -1, 1, 0, 0, 0, 2, 0, 1, 0, -1, 1, 0, 2, 2, 2, 2, 2,\n", " 2, 2, -1, 2, 1, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 0, 0, 1,\n", " 2, 2, 0, 2, 1, -1, -1, 2, -1, -1, -1, 0, 0, 2, 1, 1, 2, 2,\n", " -1, 1, 0, 2, 2, -1, 0, -1, 0, 0, 2, 2, 2, 2, 0, 1, 0, 2,\n", " -1, 2, 2, -1, 2, 2, 2, -1, 1, 0, 0])\n", "Jets pt tensor([315.0177, 24.9858, 306.9994]) obj score tensor([ 2.4366, -4.1073, -1.8993])\n", "tensor([ 2, 0, 0, 2, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 0, 0, 2, 2,\n", " 2, 0, 0, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 2, -1, 0, 0, 2,\n", " 0, 0, 2, 0, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2,\n", " 0, 2, 2, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 0, 1, 0,\n", " 2, 2, 0, 2, 1, 0, 0, 1, -1, 2, 2, 0, 2, 0, 1, 0, -1, 2,\n", " 2, 0, 2, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, 2, 2, 2, 0, 2,\n", " 2, 2, 2, 2, -1, 0, 2, 2, 0, 2, 2, 2, -1, 1, 0, 2, -1, 2,\n", " 2, -1, 2, -1, 0, 2, 0, 1, -1, 2, 0, 2, -1, 1, 2, 0, 2, -1,\n", " 2, -1, 1, 0, 0, 0, 2, 0, 1, 0, -1, 1, 0, 2, 2, 2, 2, 2,\n", " 2, 2, -1, 2, 1, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 0, 0, 1,\n", " 2, 2, 0, 2, 1, -1, -1, 2, -1, -1, -1, 0, 0, 2, 1, 1, 2, 2,\n", " -1, 1, 0, 2, 2, -1, 0, -1, 0, 0, 2, 2, 2, 2, 0, 1, 0, 2,\n", " -1, 2, 2, -1, 2, 2, 2, -1, 1, 0, 0])\n", "tensor([4, 4, 4, 4, 4, 5, 5, 3, 5, 5, 5, 4, 5, 5, 3, 5, 5, 5, 3, 5, 4, 5, 4, 5,\n", " 5, 3, 5, 4, 5, 5, 5, 3, 3, 5, 5, 5, 3, 3, 5, 5, 3, 5, 5, 2, 5, 5, 5, 5,\n", " 4, 3, 3, 3, 2, 5, 5, 5, 5, 4, 4, 4, 5, 5, 5, 5, 3, 3, 5, 4, 5, 4, 5, 3,\n", " 4, 5, 5, 3, 5, 5, 5, 4, 5, 2, 5, 5, 3, 5, 5, 4, 3, 4, 5, 2, 2, 5, 5, 3,\n", " 5, 2, 2, 4, 5, 5, 2, 5, 3, 5, 4, 2, 5, 5, 2, 4, 5, 3, 3, 5, 5, 5, 2, 3,\n", " 4, 3, 5, 5, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 5, 3, 4, 4, 4, 5, 5, 4, 5,\n", " 5, 3, 4, 3, 5, 3, 3, 3, 2, 3, 4, 4, 2, 5, 5, 3, 4, 3, 5, 2, 3, 3, 4, 4,\n", " 3, 4, 4, 4, 5, 2, 3, 3, 2, 3, 2, 5, 3, 3, 4, 5, 5, 3, 2, 3, 3, 4, 5, 5,\n", " 3, 3, 5, 5, 2, 4, 4, 5, 3, 5, 3, 3, 5, 3, 5, 2, 5, 4, 5, 3, 4, 4, 5, 4,\n", " 3, 3, 2, 5, 5, 4, 4, 5, 5, 3, 3, 5, 5, 5, 4, 3, 4, 5, 3, 2, 3, 5, 3, 4,\n", " 3, 5, 5, 5, 3, 2, 5, 3, 3, 3, 4, 5, 5, 5, 3, 5, 5, 2, 5, 4, 4, 5, 5, 5,\n", " 4, 3, 5, 3, 5, 5, 4, 3, 5, 4, 3, 5, 4, 4, 3, 5, 3, 5, 3, 2, 5, 5, 4, 5,\n", " 5, 5, 5, 5, 5, 3, 5, 3, 2, 4, 3, 4, 5, 3, 4, 4, 5, 3, 4, 4, 5, 5, 4, 5,\n", " 5, 4, 4, 3, 5, 3, 2, 3, 3, 5, 3, 4, 3, 4, 5, 4, 3, 4, 3, 3, 3, 5, 5])\n", "Jets pt tensor([ 0.0000, 0.0000, 12.2053, 60.9335, 447.7638, 361.8791]) obj score tensor([-4.4648, -1.0421, 0.6614])\n" ] }, { "ename": "AssertionError", "evalue": "Error! len(obj_score)=3, len(jets_pt)=6", "output_type": "error", "traceback": [ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[0;31mAssertionError\u001B[0m Traceback (most recent call last)", "Cell \u001B[0;32mIn[3], line 8\u001B[0m\n\u001B[1;32m 6\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m i \u001B[38;5;241m>\u001B[39m max_events:\n\u001B[1;32m 7\u001B[0m \u001B[38;5;28;01mbreak\u001B[39;00m\n\u001B[0;32m----> 8\u001B[0m event \u001B[38;5;241m=\u001B[39m \u001B[43mdataset\u001B[49m\u001B[43m[\u001B[49m\u001B[43mi\u001B[49m\u001B[43m]\u001B[49m\n\u001B[1;32m 9\u001B[0m all_obj_scores \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m torch\u001B[38;5;241m.\u001B[39msigmoid(event\u001B[38;5;241m.\u001B[39mmodel_jets\u001B[38;5;241m.\u001B[39mobj_score)\u001B[38;5;241m.\u001B[39mtolist()\n\u001B[1;32m 10\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(event\u001B[38;5;241m.\u001B[39mmodel_jets) \u001B[38;5;241m==\u001B[39m \u001B[38;5;28mlen\u001B[39m(event\u001B[38;5;241m.\u001B[39mmodel_jets\u001B[38;5;241m.\u001B[39mobj_score), \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mlen\u001B[39m(event\u001B[38;5;241m.\u001B[39mmodel_jets)\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mlen\u001B[39m(event\u001B[38;5;241m.\u001B[39mmodel_jets\u001B[38;5;241m.\u001B[39mobj_score)\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n", "File \u001B[0;32m/work/gkrzmanc/jetclustering/code/src/dataset/dataset.py:558\u001B[0m, in \u001B[0;36mEventDataset.__getitem__\u001B[0;34m(self, i)\u001B[0m\n\u001B[1;32m 556\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21m__getitem__\u001B[39m(\u001B[38;5;28mself\u001B[39m, i):\n\u001B[1;32m 557\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m i \u001B[38;5;241m<\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mn_events, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mIndex out of bounds: \u001B[39m\u001B[38;5;132;01m%d\u001B[39;00m\u001B[38;5;124m >= \u001B[39m\u001B[38;5;132;01m%d\u001B[39;00m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;241m%\u001B[39m (i, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mn_events)\n\u001B[0;32m--> 558\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_idx\u001B[49m\u001B[43m(\u001B[49m\u001B[43mi\u001B[49m\u001B[43m)\u001B[49m\n", "File \u001B[0;32m/work/gkrzmanc/jetclustering/code/src/dataset/dataset.py:419\u001B[0m, in \u001B[0;36mEventDataset.get_idx\u001B[0;34m(self, i)\u001B[0m\n\u001B[1;32m 416\u001B[0m result \u001B[38;5;241m=\u001B[39m {key: EventCollection\u001B[38;5;241m.\u001B[39mdeserialize(result[key], batch_number\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, \u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m=\u001B[39mEvent\u001B[38;5;241m.\u001B[39mevt_collections[key]) \u001B[38;5;28;01mfor\u001B[39;00m\n\u001B[1;32m 417\u001B[0m key \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mattrs}\n\u001B[1;32m 418\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmodel_output \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m--> 419\u001B[0m result[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmodel_jets\u001B[39m\u001B[38;5;124m\"\u001B[39m], bc_scores_pfcands, bc_labels_pfcands \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_model_jets\u001B[49m\u001B[43m(\u001B[49m\u001B[43mi\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mpfcands\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mresult\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mpfcands\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43minclude_target\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m1\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdq\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mresult\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mmatrix_element_gen_particles\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 420\u001B[0m result[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpfcands\u001B[39m\u001B[38;5;124m\"\u001B[39m]\u001B[38;5;241m.\u001B[39mbc_scores_pfcands \u001B[38;5;241m=\u001B[39m bc_scores_pfcands\n\u001B[1;32m 421\u001B[0m result[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpfcands\u001B[39m\u001B[38;5;124m\"\u001B[39m]\u001B[38;5;241m.\u001B[39mbc_labels_pfcands \u001B[38;5;241m=\u001B[39m bc_labels_pfcands\n", "File \u001B[0;32m/work/gkrzmanc/jetclustering/code/src/dataset/dataset.py:536\u001B[0m, in \u001B[0;36mEventDataset.get_model_jets\u001B[0;34m(self, i, pfcands, filter, dq, include_target)\u001B[0m\n\u001B[1;32m 534\u001B[0m obj_score \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmodel_output[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mobj_score_pred\u001B[39m\u001B[38;5;124m\"\u001B[39m][(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmodel_output[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mevent_clusters_idx\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m==\u001B[39m i)]\n\u001B[1;32m 535\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mJets pt\u001B[39m\u001B[38;5;124m\"\u001B[39m, jets_pt, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mobj score\u001B[39m\u001B[38;5;124m\"\u001B[39m, obj_score)\n\u001B[0;32m--> 536\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(obj_score) \u001B[38;5;241m==\u001B[39m \u001B[38;5;28mlen\u001B[39m(jets_pt), \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mError! len(obj_score)=\u001B[39m\u001B[38;5;132;01m%d\u001B[39;00m\u001B[38;5;124m, len(jets_pt)=\u001B[39m\u001B[38;5;132;01m%d\u001B[39;00m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;241m%\u001B[39m (\n\u001B[1;32m 537\u001B[0m \u001B[38;5;28mlen\u001B[39m(obj_score), \u001B[38;5;28mlen\u001B[39m(jets_pt))\n\u001B[1;32m 538\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m include_target:\n\u001B[1;32m 539\u001B[0m target_obj_score \u001B[38;5;241m=\u001B[39m EventDataset\u001B[38;5;241m.\u001B[39mget_target_obj_score(jets_eta, jets_phi, jets_pt, torch\u001B[38;5;241m.\u001B[39mzeros(jets_pt\u001B[38;5;241m.\u001B[39msize(\u001B[38;5;241m0\u001B[39m)), dq\u001B[38;5;241m.\u001B[39meta, dq\u001B[38;5;241m.\u001B[39mphi, torch\u001B[38;5;241m.\u001B[39mzeros(dq\u001B[38;5;241m.\u001B[39meta\u001B[38;5;241m.\u001B[39msize(\u001B[38;5;241m0\u001B[39m)))\n", "\u001B[0;31mAssertionError\u001B[0m: Error! len(obj_score)=3, len(jets_pt)=6" ] } ], "execution_count": 3 }, { "metadata": { "ExecuteTime": { "end_time": "2025-02-12T14:30:56.370301947Z", "start_time": "2025-02-11T12:51:26.851410Z" } }, "cell_type": "code", "source": "dir(event.model_jets)", "id": "6512f5fdd101ad0a", "outputs": [ { "data": { "text/plain": [ "['E',\n", " '__class__',\n", " '__delattr__',\n", " '__dict__',\n", " '__dir__',\n", " '__doc__',\n", " '__eq__',\n", " '__format__',\n", " '__ge__',\n", " '__getattribute__',\n", " '__getitem__',\n", " '__gt__',\n", " '__hash__',\n", " '__init__',\n", " '__init_subclass__',\n", " '__le__',\n", " '__len__',\n", " '__lt__',\n", " '__module__',\n", " '__ne__',\n", " '__new__',\n", " '__reduce__',\n", " '__reduce_ex__',\n", " '__repr__',\n", " '__setattr__',\n", " '__sizeof__',\n", " '__str__',\n", " '__subclasshook__',\n", " '__weakref__',\n", " 'area',\n", " 'copy',\n", " 'deserialize',\n", " 'eta',\n", " 'init_attrs',\n", " 'mask',\n", " 'mass',\n", " 'obj_score',\n", " 'p',\n", " 'phi',\n", " 'pt',\n", " 'pxyz',\n", " 'serialize',\n", " 'target_obj_score',\n", " 'theta']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 4 }, { "metadata": { "ExecuteTime": { "end_time": "2025-02-12T14:30:56.389354987Z", "start_time": "2025-02-11T12:51:26.920277Z" } }, "cell_type": "code", "source": [ "len(all_obj_scores)\n", "target_obj_scores = torch.tensor(target_obj_scores).int()\n", "print(len(target_obj_scores))\n", "print(len(all_obj_scores))\n", "all_obj_scores = torch.tensor(all_obj_scores)" ], "id": "75269b8834cec48e", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1679\n", "1679\n" ] } ], "execution_count": 5 }, { "metadata": { "ExecuteTime": { "end_time": "2025-02-12T14:30:56.390925882Z", "start_time": "2025-02-11T12:51:26.979844Z" } }, "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "fig, ax = plt.subplots()\n", "ax.hist(all_obj_scores, histtype=\"step\", bins=100, label=\"all\")\n", "ax.hist(all_obj_scores[target_obj_scores==1], histtype=\"step\", bins=100, color=\"green\", label=\"\")\n", "ax.hist(all_obj_scores[target_obj_scores==0], histtype=\"step\", bins=100, color=\"gray\")\n", "ax.set_yscale(\"log\")\n", "fig.show()" ], "id": "5ca859cb55f86fe7", "outputs": [ { "data": { "text/plain": [ "
" ], "image/png": "" }, "metadata": {}, "output_type": "display_data" } ], "execution_count": 6 }, { "metadata": { "ExecuteTime": { "end_time": "2025-02-12T14:30:56.404842685Z", "start_time": "2025-02-11T12:51:28.121209Z" } }, "cell_type": "code", "source": "", "id": "b1d52ebccd4befee", "outputs": [], "execution_count": null }, { "metadata": { "ExecuteTime": { "end_time": "2025-02-12T14:30:56.415139614Z", "start_time": "2025-02-11T12:51:28.174332Z" } }, "cell_type": "code", "source": "", "id": "5052a19722e9e60b", "outputs": [], "execution_count": null } ], "metadata": { "kernelspec": { "name": "python3", "language": "python", "display_name": "Python 3 (ipykernel)" } }, "nbformat": 5, "nbformat_minor": 9 }