DeepSEQreen_NAR_fb

Sleeping

App Files Files Community

libokj commited on Dec 2, 2023

Commit

764e1f1

1 Parent(s): 69bec57

Upload 347 files

Browse files

Files changed (29) hide show

configs/experiment/dti_experiment.yaml +1 -1
configs/trainer/ddp.yaml +1 -1
configs/trainer/default.yaml +1 -1
configs/trainer/gpu.yaml +1 -1
configs/webserver_inference.yaml +27 -0
deepscreen/__pycache__/__init__.cpython-311.pyc +0 -0
deepscreen/__pycache__/predict.cpython-311.pyc +0 -0
deepscreen/__pycache__/test.cpython-311.pyc +0 -0
deepscreen/data/__pycache__/dti.cpython-311.pyc +0 -0
deepscreen/data/dti.py +4 -2
deepscreen/data/featurizers/__pycache__/__init__.cpython-311.pyc +0 -0
deepscreen/data/featurizers/__pycache__/categorical.cpython-311.pyc +0 -0
deepscreen/data/utils/__pycache__/collator.cpython-311.pyc +0 -0
deepscreen/data/utils/__pycache__/label.cpython-311.pyc +0 -0
deepscreen/data/utils/__pycache__/sampler.cpython-311.pyc +0 -0
deepscreen/gui/app.py +0 -10
deepscreen/models/__pycache__/dti.cpython-311.pyc +0 -0
deepscreen/models/dti.py +27 -5
deepscreen/models/metrics/__pycache__/sensitivity.cpython-311.pyc +0 -0
deepscreen/predict.py +2 -3
deepscreen/utils/__pycache__/__init__.cpython-311.pyc +0 -0
deepscreen/utils/__pycache__/hydra.cpython-311.pyc +0 -0
deepscreen/utils/__pycache__/instantiators.cpython-311.pyc +0 -0
deepscreen/utils/__pycache__/lightning.cpython-311.pyc +0 -0
deepscreen/utils/__pycache__/logging.cpython-311.pyc +0 -0
deepscreen/utils/__pycache__/rich.cpython-311.pyc +0 -0
deepscreen/utils/__pycache__/utils.cpython-311.pyc +0 -0
deepscreen/utils/hydra.py +24 -19
deepscreen/utils/lightning.py +2 -2

configs/experiment/dti_experiment.yaml CHANGED Viewed

@@ -9,7 +9,7 @@ seed: 12345
 trainer:
   min_epochs: 1
   max_epochs: 500
-  precision: bf16
 callbacks:
   early_stopping:

 trainer:
   min_epochs: 1
   max_epochs: 500
+  precision: 16-mixed
 callbacks:
   early_stopping:

configs/trainer/ddp.yaml CHANGED Viewed

@@ -7,4 +7,4 @@ accelerator: gpu
 devices: 4
 num_nodes: 1
 sync_batchnorm: True
-precision: bf16

 devices: 4
 num_nodes: 1
 sync_batchnorm: True
+precision: 16-mixed

configs/trainer/default.yaml CHANGED Viewed

@@ -5,7 +5,7 @@ default_root_dir: ${paths.output_dir}
 min_epochs: 1
 max_epochs: 50
-precision: bf16
 gradient_clip_val: 0.5
 gradient_clip_algorithm: norm

 min_epochs: 1
 max_epochs: 50
+precision: 32
 gradient_clip_val: 0.5
 gradient_clip_algorithm: norm

configs/trainer/gpu.yaml CHANGED Viewed

@@ -3,4 +3,4 @@ defaults:
 accelerator: gpu
 devices: 1
-precision: bf16

 accelerator: gpu
 devices: 1
+precision: 16-mixed

configs/webserver_inference.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+# @package _global_
+defaults:
+  - model: dti_model # fixed for web server version
+  - task: null
+  - data: dti_data # fixed for web server version
+  - callbacks: null
+  - trainer: default
+  - paths: default
+  - extras: null
+  - hydra: default
+  - _self_
+  - preset: null
+  - experiment: null
+  - sweep: null
+  - debug: null
+  - optional local: default
+job_name: "webserver_inference"
+tags: null
+# passing checkpoint path is necessary for prediction
+ckpt_path: ???
+paths:
+  output_dir: null
+  work_dir: null

deepscreen/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/__pycache__/__init__.cpython-311.pyc and b/deepscreen/__pycache__/__init__.cpython-311.pyc differ

deepscreen/__pycache__/predict.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/__pycache__/predict.cpython-311.pyc and b/deepscreen/__pycache__/predict.cpython-311.pyc differ

deepscreen/__pycache__/test.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/__pycache__/test.cpython-311.pyc and b/deepscreen/__pycache__/test.cpython-311.pyc differ

deepscreen/data/__pycache__/dti.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/data/__pycache__/dti.cpython-311.pyc and b/deepscreen/data/__pycache__/dti.cpython-311.pyc differ

deepscreen/data/dti.py CHANGED Viewed

@@ -150,9 +150,11 @@ class DTIDataset(Dataset):
         sample = self.df.loc[i]
         return {
             'N': i,
-            'X1': self.drug_featurizer(sample['X1']),
             'ID1': sample.get('ID1', sample['X1']),
-            'X2': self.protein_featurizer(sample['X2']),
             'ID2': sample.get('ID2', sample['X2']),
             'Y': sample.get('Y'),
             'IDX': sample['IDX'],

         sample = self.df.loc[i]
         return {
             'N': i,
+            'X1': sample['X1'],
+            'X1^': self.drug_featurizer(sample['X1']),
             'ID1': sample.get('ID1', sample['X1']),
+            'X2': sample['X2'],
+            'X2^': self.protein_featurizer(sample['X2']),
             'ID2': sample.get('ID2', sample['X2']),
             'Y': sample.get('Y'),
             'IDX': sample['IDX'],

deepscreen/data/featurizers/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/data/featurizers/__pycache__/__init__.cpython-311.pyc and b/deepscreen/data/featurizers/__pycache__/__init__.cpython-311.pyc differ

deepscreen/data/featurizers/__pycache__/categorical.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/data/featurizers/__pycache__/categorical.cpython-311.pyc and b/deepscreen/data/featurizers/__pycache__/categorical.cpython-311.pyc differ

deepscreen/data/utils/__pycache__/collator.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/data/utils/__pycache__/collator.cpython-311.pyc and b/deepscreen/data/utils/__pycache__/collator.cpython-311.pyc differ

deepscreen/data/utils/__pycache__/label.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/data/utils/__pycache__/label.cpython-311.pyc and b/deepscreen/data/utils/__pycache__/label.cpython-311.pyc differ

deepscreen/data/utils/__pycache__/sampler.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/data/utils/__pycache__/sampler.cpython-311.pyc and b/deepscreen/data/utils/__pycache__/sampler.cpython-311.pyc differ

deepscreen/gui/app.py CHANGED Viewed

@@ -16,16 +16,6 @@ root = Path.cwd()
 task_list = [f.stem for f in root.parent.joinpath("configs/task").iterdir() if f.suffix == ".yaml"]
 preset_list = [f.stem for f in root.parent.joinpath("configs/preset").iterdir() if f.suffix == ".yaml"]
 predictor_list = [f.stem for f in root.parent.joinpath("configs/model/predictor").iterdir() if f.suffix == ".yaml"]
-drug_encoder_list = [f.stem for f in root.parent.joinpath("configs/model/predictor/drug_encoder").iterdir() if
-                     f.suffix == ".yaml"]
-drug_featurizer_list = [f.stem for f in root.parent.joinpath("configs/data/drug_featurizer").iterdir() if
-                        f.suffix == ".yaml"]
-protein_encoder_list = [f.stem for f in root.parent.joinpath("configs/model/predictor/protein_encoder").iterdir() if
-                        f.suffix == ".yaml"]
-protein_featurizer_list = [f.stem for f in root.parent.joinpath("configs/data/protein_featurizer").iterdir() if
-                           f.suffix == ".yaml"]
-classifier_list = [f.stem for f in root.parent.joinpath("configs/model/predictor/decoder").iterdir() if
-                   f.suffix == ".yaml"]
 def load_csv(file):

 task_list = [f.stem for f in root.parent.joinpath("configs/task").iterdir() if f.suffix == ".yaml"]
 preset_list = [f.stem for f in root.parent.joinpath("configs/preset").iterdir() if f.suffix == ".yaml"]
 predictor_list = [f.stem for f in root.parent.joinpath("configs/model/predictor").iterdir() if f.suffix == ".yaml"]
 def load_csv(file):

deepscreen/models/__pycache__/dti.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/models/__pycache__/dti.cpython-311.pyc and b/deepscreen/models/__pycache__/dti.cpython-311.pyc differ

deepscreen/models/dti.py CHANGED Viewed

@@ -64,7 +64,7 @@ class DTILightningModule(LightningModule):
         self.forward(dummy_batch)
     def forward(self, batch):
-        output = self.predictor(batch['X1'], batch['X2'])
         target = batch.get('Y')
         indexes = batch.get('IDX')
         preds = None
@@ -92,7 +92,12 @@ class DTILightningModule(LightningModule):
         self.train_metrics(preds=preds, target=target, indexes=indexes.long())
         self.log_dict(self.train_metrics, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True)
-        return {'loss': loss, 'N': batch['N'], 'ID1': batch['ID1'], 'ID2': batch['ID2'], 'Y^': preds, 'Y': target}
     def on_train_epoch_end(self):
         pass
@@ -104,6 +109,13 @@ class DTILightningModule(LightningModule):
         self.val_metrics(preds=preds, target=target, indexes=indexes.long())
         self.log_dict(self.val_metrics, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True)
     def on_validation_epoch_end(self):
         pass
@@ -115,15 +127,25 @@ class DTILightningModule(LightningModule):
         self.log_dict(self.test_metrics, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True)
         # return a dictionary for callbacks like BasePredictionWriter
-        return {'N': batch['N'], 'ID1': batch['ID1'], 'ID2': batch['ID2'], 'Y^': preds, 'Y': target}
     def on_test_epoch_end(self):
         pass
     def predict_step(self, batch, batch_idx, dataloader_idx=0):
-        preds, target, indexes = self.forward(batch)
         # return a dictionary for callbacks like BasePredictionWriter
-        return {'N': batch['N'], 'ID1': batch['ID1'], 'ID2': batch['ID2'], 'Y^': preds}
     def configure_optimizers(self):
         optimizers_config = {'optimizer': self.hparams.optimizer(params=self.parameters())}

         self.forward(dummy_batch)
     def forward(self, batch):
+        output = self.predictor(batch['X1^'], batch['X2^'])
         target = batch.get('Y')
         indexes = batch.get('IDX')
         preds = None
         self.train_metrics(preds=preds, target=target, indexes=indexes.long())
         self.log_dict(self.train_metrics, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True)
+        return {
+            'N': batch['N'],
+            'ID1': batch['ID1'], 'X1': batch['X1'],
+            'ID2': batch['ID2'], 'X2': batch['X2'],
+            'Y^': preds, 'Y': target, 'loss': loss
+        }
     def on_train_epoch_end(self):
         pass
         self.val_metrics(preds=preds, target=target, indexes=indexes.long())
         self.log_dict(self.val_metrics, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True)
+        return {
+            'N': batch['N'],
+            'ID1': batch['ID1'], 'X1': batch['X1'],
+            'ID2': batch['ID2'], 'X2': batch['X2'],
+            'Y^': preds, 'Y': target, 'loss': loss
+        }
     def on_validation_epoch_end(self):
         pass
         self.log_dict(self.test_metrics, on_step=False, on_epoch=True, prog_bar=True, sync_dist=True)
         # return a dictionary for callbacks like BasePredictionWriter
+        return {
+            'N': batch['N'],
+            'ID1': batch['ID1'], 'X1': batch['X1'],
+            'ID2': batch['ID2'], 'X2': batch['X2'],
+            'Y^': preds, 'Y': target, 'loss': loss
+        }
     def on_test_epoch_end(self):
         pass
     def predict_step(self, batch, batch_idx, dataloader_idx=0):
+        preds, _, _, _ = self.forward(batch)
         # return a dictionary for callbacks like BasePredictionWriter
+        return {
+            'N': batch['N'],
+            'ID1': batch['ID1'], 'X1': batch['X1'],
+            'ID2': batch['ID2'], 'X2': batch['X2'],
+            'Y^': preds
+        }
     def configure_optimizers(self):
         optimizers_config = {'optimizer': self.hparams.optimizer(params=self.parameters())}

deepscreen/models/metrics/__pycache__/sensitivity.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/models/metrics/__pycache__/sensitivity.cpython-311.pyc and b/deepscreen/models/metrics/__pycache__/sensitivity.cpython-311.pyc differ

deepscreen/predict.py CHANGED Viewed

@@ -34,9 +34,6 @@ def predict(cfg: DictConfig) -> Tuple[list, dict]:
     Returns:
         Tuple[dict, dict]: Dict with metrics and dict with all instantiated objects.
     """
-    assert cfg.ckpt_path, "Checkpoint path (`ckpt_path`) must be specified for predicting."
-    cfg = checkpoint_rerun_config(cfg)
     log.info(f"Instantiating data <{cfg.data._target_}>")
     datamodule: LightningDataModule = hydra.utils.instantiate(cfg.data)
@@ -65,6 +62,8 @@ def predict(cfg: DictConfig) -> Tuple[list, dict]:
 @hydra.main(version_base="1.3", config_path="../configs", config_name="predict.yaml")
 def main(cfg: DictConfig):
     predictions, _ = predict(cfg)
     return predictions

     Returns:
         Tuple[dict, dict]: Dict with metrics and dict with all instantiated objects.
     """
     log.info(f"Instantiating data <{cfg.data._target_}>")
     datamodule: LightningDataModule = hydra.utils.instantiate(cfg.data)
 @hydra.main(version_base="1.3", config_path="../configs", config_name="predict.yaml")
 def main(cfg: DictConfig):
+    assert cfg.ckpt_path, "Checkpoint path (`ckpt_path`) must be specified for predicting."
+    cfg = checkpoint_rerun_config(cfg)
     predictions, _ = predict(cfg)
     return predictions

deepscreen/utils/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/utils/__pycache__/__init__.cpython-311.pyc and b/deepscreen/utils/__pycache__/__init__.cpython-311.pyc differ

deepscreen/utils/__pycache__/hydra.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/utils/__pycache__/hydra.cpython-311.pyc and b/deepscreen/utils/__pycache__/hydra.cpython-311.pyc differ

deepscreen/utils/__pycache__/instantiators.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/utils/__pycache__/instantiators.cpython-311.pyc and b/deepscreen/utils/__pycache__/instantiators.cpython-311.pyc differ

deepscreen/utils/__pycache__/lightning.cpython-311.pyc ADDED Viewed

Binary file (4.24 kB). View file

deepscreen/utils/__pycache__/logging.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/utils/__pycache__/logging.cpython-311.pyc and b/deepscreen/utils/__pycache__/logging.cpython-311.pyc differ

deepscreen/utils/__pycache__/rich.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/utils/__pycache__/rich.cpython-311.pyc and b/deepscreen/utils/__pycache__/rich.cpython-311.pyc differ

deepscreen/utils/__pycache__/utils.cpython-311.pyc CHANGED Viewed

Binary files a/deepscreen/utils/__pycache__/utils.cpython-311.pyc and b/deepscreen/utils/__pycache__/utils.cpython-311.pyc differ

deepscreen/utils/hydra.py CHANGED Viewed

@@ -73,28 +73,32 @@ class CSVExperimentSummary(Callback):
             override_dict['epoch'] = int(re.search(r'epoch_(\d+)', override_dict['ckpt_path']).group(1))
             # Add metrics info
-            output_dir = Path(config.hydra.runtime.output_dir).resolve()
-            csv_metrics_path = output_dir / config.logger.csv.name / "metrics.csv"
-            if csv_metrics_path.is_file():
-                log.info(f"Summarizing metrics with prefix `{self.prefix}` from {csv_metrics_path}")
-                # Use only columns that start with the specified prefix
-                metrics_df = pd.read_csv(csv_metrics_path)
-                # Find rows where any 'test/' column is not null and reset its epoch to the best model epoch
-                test_columns = [col for col in metrics_df.columns if col.startswith('test/')]
-                mask = metrics_df[test_columns].notna().any(axis=1)
-                metrics_df.loc[mask, 'epoch'] = override_dict['epoch']
-                # Group and filter by best epoch
-                metrics_df = metrics_df.groupby('epoch').first()
-                metrics_df = metrics_df[metrics_df.index == override_dict['epoch']]
             else:
-                log.info(f"No metrics.csv found in {output_dir}")
-                metrics_df = pd.DataFrame()
-            metrics_df = metrics_df.assign(**override_dict)
-            metrics_df.index = [0]
             # Add extra info from the input batch experiment summary
-            if self.input_experiment_summary is not None:
                 orig_meta = self.input_experiment_summary[
                     self.input_experiment_summary['ckpt_path'] == metrics_df['ckpt_path'][0]
                     ].head(1)
@@ -102,6 +106,7 @@ class CSVExperimentSummary(Callback):
                 metrics_df = metrics_df.combine_first(orig_meta)
             summary_df = pd.concat([summary_df, metrics_df])
             # Drop empty columns
             summary_df.dropna(inplace=True, axis=1, how='all')
             summary_df.to_csv(summary_file_path, index=False, mode='w')

             override_dict['epoch'] = int(re.search(r'epoch_(\d+)', override_dict['ckpt_path']).group(1))
             # Add metrics info
+            metrics_df = pd.DataFrame()
+            if config.get('logger'):
+                output_dir = Path(config.hydra.runtime.output_dir).resolve()
+                csv_metrics_path = output_dir / config.logger.csv.name / "metrics.csv"
+                if csv_metrics_path.is_file():
+                    log.info(f"Summarizing metrics with prefix `{self.prefix}` from {csv_metrics_path}")
+                    # Use only columns that start with the specified prefix
+                    metrics_df = pd.read_csv(csv_metrics_path)
+                    # Find rows where any 'test/' column is not null and reset its epoch to the best model epoch
+                    test_columns = [col for col in metrics_df.columns if col.startswith('test/')]
+                    mask = metrics_df[test_columns].notna().any(axis=1)
+                    metrics_df.loc[mask, 'epoch'] = override_dict['epoch']
+                    # Group and filter by best epoch
+                    metrics_df = metrics_df.groupby('epoch').first()
+                    metrics_df = metrics_df[metrics_df.index == override_dict['epoch']]
+                else:
+                    log.info(f"No metrics.csv found in {output_dir}")
+            if metrics_df.empty:
+                metrics_df = pd.DataFrame(data=override_dict, index=[0])
             else:
+                metrics_df = metrics_df.assign(**override_dict)
+                metrics_df.index = [0]
             # Add extra info from the input batch experiment summary
+            if self.input_experiment_summary is not None and 'ckpt_path' in metrics_df.columns:
                 orig_meta = self.input_experiment_summary[
                     self.input_experiment_summary['ckpt_path'] == metrics_df['ckpt_path'][0]
                     ].head(1)
                 metrics_df = metrics_df.combine_first(orig_meta)
             summary_df = pd.concat([summary_df, metrics_df])
             # Drop empty columns
             summary_df.dropna(inplace=True, axis=1, how='all')
             summary_df.to_csv(summary_file_path, index=False, mode='w')

deepscreen/utils/lightning.py CHANGED Viewed

@@ -22,14 +22,14 @@ class CSVPredictionWriter(BasePredictionWriter):
         output_df = self.outputs_to_dataframe(outputs)
         output_df.to_csv(self.output_file,
                          mode='a',
-                         index_label='N',
                          header=not self.output_file.is_file())
     def write_on_epoch_end(self, trainer, pl_module, predictions, batch_indices):
         output_df = pd.concat([self.outputs_to_dataframe(outputs) for outputs in predictions])
         output_df.to_csv(self.output_file,
                          mode='w',
-                         index_label='N',
                          header=True)
     def on_test_batch_end(self, trainer, pl_module, outputs, batch, batch_idx: int, dataloader_idx: int = 0):

         output_df = self.outputs_to_dataframe(outputs)
         output_df.to_csv(self.output_file,
                          mode='a',
+                         index=False,
                          header=not self.output_file.is_file())
     def write_on_epoch_end(self, trainer, pl_module, predictions, batch_indices):
         output_df = pd.concat([self.outputs_to_dataframe(outputs) for outputs in predictions])
         output_df.to_csv(self.output_file,
                          mode='w',
+                         index=False,
                          header=True)
     def on_test_batch_end(self, trainer, pl_module, outputs, batch, batch_idx: int, dataloader_idx: int = 0):