torchdrug

Sleeping

App Files Files Community

jannisborn commited on Jan 8, 2023

Commit

188d00f

unverified ·

1 Parent(s): 7d76d6f

update

Browse files

Files changed (5) hide show

app.py +21 -28
model_cards/article.md +25 -25
model_cards/description.md +5 -1
model_cards/examples.csv +3 -4
utils.py +2 -6

app.py CHANGED Viewed

@@ -3,7 +3,11 @@ import pathlib
 import gradio as gr
 import pandas as pd
-from gt4sd.algorithms.generation.moler import MoLeR, MoLeRDefaultGenerator
 from gt4sd.algorithms.registry import ApplicationsRegistry
 from utils import draw_grid_generate
@@ -14,26 +18,19 @@ logger.addHandler(logging.NullHandler())
 TITLE = "MoLeR"
-def run_inference(
-    algorithm_version: str,
-    scaffolds: str,
-    beam_size: int,
-    number_of_samples: int,
-    seed: int,
-):
-    config = MoLeRDefaultGenerator(
-        algorithm_version=algorithm_version,
-        scaffolds=scaffolds,
-        beam_size=beam_size,
-        num_samples=4,
-        seed=seed,
-        num_workers=1,
-    )
-    model = MoLeR(configuration=config)
     samples = list(model.sample(number_of_samples))
-    seed_mols = [] if scaffolds == "" else scaffolds.split(".")
-    return draw_grid_generate(seed_mols, samples)
 if __name__ == "__main__":
@@ -42,7 +39,7 @@ if __name__ == "__main__":
     all_algos = ApplicationsRegistry.list_available()
     algos = [
         x["algorithm_version"]
-        for x in list(filter(lambda x: TITLE in x["algorithm_name"], all_algos))
     ]
     # Load metadata
@@ -59,19 +56,15 @@ if __name__ == "__main__":
     demo = gr.Interface(
         fn=run_inference,
-        title="MoLeR (MOlecule-LEvel Representation)",
         inputs=[
-            gr.Dropdown(algos, label="Algorithm version", value="v0"),
-            gr.Textbox(
-                label="Scaffolds",
-                placeholder="CC(C#C)N(C)C(=O)NC1=CC=C(Cl)C=C1",
-                lines=1,
             ),
-            gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Beam_size"),
             gr.Slider(
                 minimum=1, maximum=50, value=10, label="Number of samples", step=1
             ),
-            gr.Number(value=42, label="Seed", precision=0),
         ],
         outputs=gr.HTML(label="Output"),
         article=article,

 import gradio as gr
 import pandas as pd
+from gt4sd.algorithms.generation.torchdrug import (
+    TorchDrugGenerator,
+    TorchDrugGCPN,
+    TorchDrugGraphAF,
+)
 from gt4sd.algorithms.registry import ApplicationsRegistry
 from utils import draw_grid_generate
 TITLE = "MoLeR"
+def run_inference(algorithm: str, algorithm_version: str, number_of_samples: int):
+    if algorithm == "GCPN":
+        config = TorchDrugGCPN(algorithm_version=algorithm_version)
+    elif algorithm == "GraphAF":
+        config = TorchDrugGraphAF(algorithm_version=algorithm_version)
+    else:
+        raise ValueError(f"Unsupported model {algorithm}.")
+    model = TorchDrugGenerator(configuration=config)
     samples = list(model.sample(number_of_samples))
+    return draw_grid_generate(samples=samples, n_cols=5)
 if __name__ == "__main__":
     all_algos = ApplicationsRegistry.list_available()
     algos = [
         x["algorithm_version"]
+        for x in list(filter(lambda x: "TorchDrug" in x["algorithm_name"], all_algos))
     ]
     # Load metadata
     demo = gr.Interface(
         fn=run_inference,
+        title="TorchDrug (GCPN and GraphAF)",
         inputs=[
+            gr.Dropdown(["GCPN", "GraphAF"], label="Algorithm", value="GCPN"),
+            gr.Dropdown(
+                list(set(algos)), label="Algorithm version", value="zinc250k_v0"
             ),
             gr.Slider(
                 minimum=1, maximum=50, value=10, label="Number of samples", step=1
             ),
         ],
         outputs=gr.HTML(label="Output"),
         article=article,

model_cards/article.md CHANGED Viewed

@@ -1,37 +1,37 @@
 # Model documentation & parameters
-**Algorithm Version**: Which model checkpoint to use (trained on different datasets).
-**Scaffolds**: One or multiple scaffolds (or seed molecules), provided as '.'-separated SMILES. If empty, no scaffolds are used.
 **Number of samples**: How many samples should be generated (between 1 and 50).
-**Beam size**: Beam size used in beam search decoding (the higher the slower but better).
-**Seed**: The random seed used for initialization.
-# Model card
-**Model Details**: MoLeR is a graph-based molecular generative model that can be conditioned (primed) on scaffolds. The model decorates scaffolds with realistic structural motifs.
-**Developers**: Krzysztof Maziarz and co-authors from Microsoft Research and Novartis (full reference at bottom).
-**Distributors**: Developer's code wrapped and distributed by GT4SD Team (2023) from IBM Research.
-**Model date**: Released around March 2022.
-**Model version**: Model provided by original authors, see [their GitHub repo](https://github.com/microsoft/molecule-generation).
-**Model type**: An encoder-decoder-based GNN for molecular generation.
-**Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**: Trained by the original authors with the default parameters provided [on GitHub](https://github.com/microsoft/molecule-generation).
-**Paper or other resource for more information**: Learning to Extend Molecular Scaffolds with Structural Motifs (ICLR 2022).
-**License**: MIT
-**Where to send questions or comments about the model**: Open an issue on original author's [GitHub repository](https://github.com/microsoft/molecule-generation).
 **Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
@@ -41,9 +41,9 @@
 **Factors**: Not applicable.
-**Metrics**: Validation loss on decoding correct molecules. Evaluated on several downstream tasks.
-**Datasets**: 1.5M drug-like molecules from GuacaMol benchmark. Finetuning on 20 molecular optimization tasks from GuacaMol.
 **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
@@ -54,12 +54,12 @@ Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi
 ## Citation
 ```bib
-@inproceedings{maziarz2021learning,
-  author={Krzysztof Maziarz and Henry Richard Jackson{-}Flux and Pashmina Cameron and
-    Finton Sirockin and Nadine Schneider and Nikolaus Stiefl and Marwin H. S. Segler and Marc Brockschmidt},
-  title     = {Learning to Extend Molecular Scaffolds with Structural Motifs},
-  booktitle = {The Tenth International Conference on Learning Representations, {ICLR}},
-  year      = {2022}
 }
 ```

 # Model documentation & parameters
+**Algorithm**: Which model to use (GCPN or GraphAF).
+**Algorithm Version**: Which model checkpoint to use (trained on different datasets).
 **Number of samples**: How many samples should be generated (between 1 and 50).
+# Model card -- GCPN
+**Model Details**: GCPN is a graph-based molecular generative model that can be optimized with RL for goal-directed graph generation.
+**Developers**: Jiaxuan You and co-authors from Stanford.
+**Distributors**: Code provided by TorchDrug developers, wrapped and distributed by GT4SD Team (2023) from IBM Research.
+**Model date**: Published in 2018.
+**Model version**: Models trained by GT4SD team on the tasks provided by TorchDrug repo [(see their tutorial)](https://torchdrug.ai/docs/tutorials/generation.html).
+- **ZINC_250k**: 250,000 drug-like molecules with a maximum atom number of 38, taken from [ZINC](https://zinc.docking.org).
+- **QED**: ZINC dataset, but the model was optimized with Proximal Policy Optimization (PPO) to generate molecules with high QED scores.
+- **pLogP**: ZINC dataset, but the model was optimized with Proximal Policy Optimization (PPO) to generate molecules with high pLogP scores.
+**Model type**: A graph-based molecular generative model that can be optimized with RL for goal-directed graph generation.
+**Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**: Default parameters as provided in [(TorchDrug tutorial)](https://torchdrug.ai/docs/tutorials/generation.html).
+**Paper or other resource for more information**: [Graph Convolutional Policy Network for
+Goal-Directed Molecular Graph Generation (NeurIPS 2018)](https://proceedings.neurips.cc/paper/2018/file/d60678e8f2ba9c540798ebbde31177e8-Paper.pdf).
+**License**: TorchDrug: Apache-2.0 license.
+**Where to send questions or comments about the model**: Open an issue on [TorchDrug repository](https://github.com/DeepGraphLearning/torchdrug).
 **Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
 **Factors**: Not applicable.
+**Metrics**: Validation loss on decoding correct molecules.
+**Datasets**: 250,000 drug-like molecules from [ZINC](https://zinc.docking.org) (with a maximum atom number of 38).
 **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
 ## Citation
 ```bib
+@article{you2018graph,
+  title={Graph convolutional policy network for goal-directed molecular graph generation},
+  author={You, Jiaxuan and Liu, Bowen and Ying, Zhitao and Pande, Vijay and Leskovec, Jure},
+  journal={Advances in neural information processing systems},
+  volume={31},
+  year={2018}
 }
 ```

model_cards/description.md CHANGED Viewed

@@ -1,6 +1,10 @@
 <img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
-MoLeR (Maziarz et al., (2022), *ICLR*) is a graph-based molecular generative model that can be conditioned (primed) on scaffolds. This model r is provided and distributed by the **GT4SD** (Generative Toolkit for Scientific Discovery).
 For **examples** and **documentation** of the model parameters, please see below.
 Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.

 <img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
+[TorchDrug](https://github.com/DeepGraphLearning/torchdrug) is a PyTorch toolbox on graph models for drug discovery.
+We, the developers of **GT4SD** (Generative Toolkit for Scientific Discovery), provide access to two graph-based molecular generative models distributed by TorchDrug:
+- **GCPN**: Graph Convolutional Policy Network ([You et al., (2018), *NeurIPS*](https://proceedings.neurips.cc/paper/2018/hash/d60678e8f2ba9c540798ebbde31177e8-Abstract.html))
+- **GraphAF**: GraphAF: a Flow-based Autoregressive Model for Molecular Graph Generation ([Shi et al., (2020), *ICLR*](https://openreview.net/forum?id=S1esMkHYPr))
 For **examples** and **documentation** of the model parameters, please see below.
 Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.

model_cards/examples.csv CHANGED Viewed

@@ -1,5 +1,4 @@
-v0,,1,4,0
-v0,CC(=O)NC1=NC2=CC(OCC3=CC=CN(CC4=CC=C(Cl)C=C4)C3=O)=CC=C2N1,1,10,0
-v0,C12C=CC=NN1C(C#CC1=C(C)C=CC3C(NC4=CC(C(F)(F)F)=CC=C4)=NOC1=3)=CN=2.CCO,3,5,5

+GCPN_zinc250k_v0,5
+GCPN_qed_v0,10
+GraphAF_plogp_v0,5

utils.py CHANGED Viewed

@@ -1,21 +1,17 @@
-import json
 import logging
-import os
 from collections import defaultdict
-from typing import Dict, List, Tuple
 import mols2grid
 import pandas as pd
-from rdkit import Chem
-from terminator.selfies import decoder
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 def draw_grid_generate(
-    seeds: List[str],
     samples: List[str],
     n_cols: int = 3,
     size=(140, 200),
 ) -> str:

 import logging
 from collections import defaultdict
+from typing import List
 import mols2grid
 import pandas as pd
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 def draw_grid_generate(
     samples: List[str],
+    seeds: List[str] = [],
     n_cols: int = 3,
     size=(140, 200),
 ) -> str: