update
Browse files- app.py +21 -28
- model_cards/article.md +25 -25
- model_cards/description.md +5 -1
- model_cards/examples.csv +3 -4
- utils.py +2 -6
app.py
CHANGED
|
@@ -3,7 +3,11 @@ import pathlib
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
| 6 |
-
from gt4sd.algorithms.generation.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
from gt4sd.algorithms.registry import ApplicationsRegistry
|
| 9 |
from utils import draw_grid_generate
|
|
@@ -14,26 +18,19 @@ logger.addHandler(logging.NullHandler())
|
|
| 14 |
TITLE = "MoLeR"
|
| 15 |
|
| 16 |
|
| 17 |
-
def run_inference(
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
beam_size=beam_size,
|
| 28 |
-
num_samples=4,
|
| 29 |
-
seed=seed,
|
| 30 |
-
num_workers=1,
|
| 31 |
-
)
|
| 32 |
-
model = MoLeR(configuration=config)
|
| 33 |
samples = list(model.sample(number_of_samples))
|
| 34 |
|
| 35 |
-
|
| 36 |
-
return draw_grid_generate(seed_mols, samples)
|
| 37 |
|
| 38 |
|
| 39 |
if __name__ == "__main__":
|
|
@@ -42,7 +39,7 @@ if __name__ == "__main__":
|
|
| 42 |
all_algos = ApplicationsRegistry.list_available()
|
| 43 |
algos = [
|
| 44 |
x["algorithm_version"]
|
| 45 |
-
for x in list(filter(lambda x:
|
| 46 |
]
|
| 47 |
|
| 48 |
# Load metadata
|
|
@@ -59,19 +56,15 @@ if __name__ == "__main__":
|
|
| 59 |
|
| 60 |
demo = gr.Interface(
|
| 61 |
fn=run_inference,
|
| 62 |
-
title="
|
| 63 |
inputs=[
|
| 64 |
-
gr.Dropdown(
|
| 65 |
-
gr.
|
| 66 |
-
label="
|
| 67 |
-
placeholder="CC(C#C)N(C)C(=O)NC1=CC=C(Cl)C=C1",
|
| 68 |
-
lines=1,
|
| 69 |
),
|
| 70 |
-
gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Beam_size"),
|
| 71 |
gr.Slider(
|
| 72 |
minimum=1, maximum=50, value=10, label="Number of samples", step=1
|
| 73 |
),
|
| 74 |
-
gr.Number(value=42, label="Seed", precision=0),
|
| 75 |
],
|
| 76 |
outputs=gr.HTML(label="Output"),
|
| 77 |
article=article,
|
|
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
| 6 |
+
from gt4sd.algorithms.generation.torchdrug import (
|
| 7 |
+
TorchDrugGenerator,
|
| 8 |
+
TorchDrugGCPN,
|
| 9 |
+
TorchDrugGraphAF,
|
| 10 |
+
)
|
| 11 |
|
| 12 |
from gt4sd.algorithms.registry import ApplicationsRegistry
|
| 13 |
from utils import draw_grid_generate
|
|
|
|
| 18 |
TITLE = "MoLeR"
|
| 19 |
|
| 20 |
|
| 21 |
+
def run_inference(algorithm: str, algorithm_version: str, number_of_samples: int):
|
| 22 |
+
|
| 23 |
+
if algorithm == "GCPN":
|
| 24 |
+
config = TorchDrugGCPN(algorithm_version=algorithm_version)
|
| 25 |
+
elif algorithm == "GraphAF":
|
| 26 |
+
config = TorchDrugGraphAF(algorithm_version=algorithm_version)
|
| 27 |
+
else:
|
| 28 |
+
raise ValueError(f"Unsupported model {algorithm}.")
|
| 29 |
+
|
| 30 |
+
model = TorchDrugGenerator(configuration=config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
samples = list(model.sample(number_of_samples))
|
| 32 |
|
| 33 |
+
return draw_grid_generate(samples=samples, n_cols=5)
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
if __name__ == "__main__":
|
|
|
|
| 39 |
all_algos = ApplicationsRegistry.list_available()
|
| 40 |
algos = [
|
| 41 |
x["algorithm_version"]
|
| 42 |
+
for x in list(filter(lambda x: "TorchDrug" in x["algorithm_name"], all_algos))
|
| 43 |
]
|
| 44 |
|
| 45 |
# Load metadata
|
|
|
|
| 56 |
|
| 57 |
demo = gr.Interface(
|
| 58 |
fn=run_inference,
|
| 59 |
+
title="TorchDrug (GCPN and GraphAF)",
|
| 60 |
inputs=[
|
| 61 |
+
gr.Dropdown(["GCPN", "GraphAF"], label="Algorithm", value="GCPN"),
|
| 62 |
+
gr.Dropdown(
|
| 63 |
+
list(set(algos)), label="Algorithm version", value="zinc250k_v0"
|
|
|
|
|
|
|
| 64 |
),
|
|
|
|
| 65 |
gr.Slider(
|
| 66 |
minimum=1, maximum=50, value=10, label="Number of samples", step=1
|
| 67 |
),
|
|
|
|
| 68 |
],
|
| 69 |
outputs=gr.HTML(label="Output"),
|
| 70 |
article=article,
|
model_cards/article.md
CHANGED
|
@@ -1,37 +1,37 @@
|
|
| 1 |
# Model documentation & parameters
|
| 2 |
|
| 3 |
-
**Algorithm
|
| 4 |
|
| 5 |
-
**
|
| 6 |
|
| 7 |
**Number of samples**: How many samples should be generated (between 1 and 50).
|
| 8 |
|
| 9 |
-
**Beam size**: Beam size used in beam search decoding (the higher the slower but better).
|
| 10 |
-
|
| 11 |
-
**Seed**: The random seed used for initialization.
|
| 12 |
-
|
| 13 |
|
| 14 |
-
# Model card
|
| 15 |
|
| 16 |
-
**Model Details**:
|
| 17 |
|
| 18 |
-
**Developers**:
|
| 19 |
|
| 20 |
-
**Distributors**:
|
| 21 |
|
| 22 |
-
**Model date**:
|
| 23 |
|
| 24 |
-
**Model version**:
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
**Model type**:
|
| 27 |
|
| 28 |
-
**Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
|
| 29 |
|
| 30 |
-
**Paper or other resource for more information**:
|
|
|
|
| 31 |
|
| 32 |
-
**License**:
|
| 33 |
|
| 34 |
-
**Where to send questions or comments about the model**: Open an issue on
|
| 35 |
|
| 36 |
**Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
|
| 37 |
|
|
@@ -41,9 +41,9 @@
|
|
| 41 |
|
| 42 |
**Factors**: Not applicable.
|
| 43 |
|
| 44 |
-
**Metrics**: Validation loss on decoding correct molecules.
|
| 45 |
|
| 46 |
-
**Datasets**:
|
| 47 |
|
| 48 |
**Ethical Considerations**: Unclear, please consult with original authors in case of questions.
|
| 49 |
|
|
@@ -54,12 +54,12 @@ Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi
|
|
| 54 |
## Citation
|
| 55 |
|
| 56 |
```bib
|
| 57 |
-
@
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
year
|
| 63 |
}
|
| 64 |
```
|
| 65 |
|
|
|
|
| 1 |
# Model documentation & parameters
|
| 2 |
|
| 3 |
+
**Algorithm**: Which model to use (GCPN or GraphAF).
|
| 4 |
|
| 5 |
+
**Algorithm Version**: Which model checkpoint to use (trained on different datasets).
|
| 6 |
|
| 7 |
**Number of samples**: How many samples should be generated (between 1 and 50).
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
# Model card -- GCPN
|
| 11 |
|
| 12 |
+
**Model Details**: GCPN is a graph-based molecular generative model that can be optimized with RL for goal-directed graph generation.
|
| 13 |
|
| 14 |
+
**Developers**: Jiaxuan You and co-authors from Stanford.
|
| 15 |
|
| 16 |
+
**Distributors**: Code provided by TorchDrug developers, wrapped and distributed by GT4SD Team (2023) from IBM Research.
|
| 17 |
|
| 18 |
+
**Model date**: Published in 2018.
|
| 19 |
|
| 20 |
+
**Model version**: Models trained by GT4SD team on the tasks provided by TorchDrug repo [(see their tutorial)](https://torchdrug.ai/docs/tutorials/generation.html).
|
| 21 |
+
- **ZINC_250k**: 250,000 drug-like molecules with a maximum atom number of 38, taken from [ZINC](https://zinc.docking.org).
|
| 22 |
+
- **QED**: ZINC dataset, but the model was optimized with Proximal Policy Optimization (PPO) to generate molecules with high QED scores.
|
| 23 |
+
- **pLogP**: ZINC dataset, but the model was optimized with Proximal Policy Optimization (PPO) to generate molecules with high pLogP scores.
|
| 24 |
|
| 25 |
+
**Model type**: A graph-based molecular generative model that can be optimized with RL for goal-directed graph generation.
|
| 26 |
|
| 27 |
+
**Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**: Default parameters as provided in [(TorchDrug tutorial)](https://torchdrug.ai/docs/tutorials/generation.html).
|
| 28 |
|
| 29 |
+
**Paper or other resource for more information**: [Graph Convolutional Policy Network for
|
| 30 |
+
Goal-Directed Molecular Graph Generation (NeurIPS 2018)](https://proceedings.neurips.cc/paper/2018/file/d60678e8f2ba9c540798ebbde31177e8-Paper.pdf).
|
| 31 |
|
| 32 |
+
**License**: TorchDrug: Apache-2.0 license.
|
| 33 |
|
| 34 |
+
**Where to send questions or comments about the model**: Open an issue on [TorchDrug repository](https://github.com/DeepGraphLearning/torchdrug).
|
| 35 |
|
| 36 |
**Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
|
| 37 |
|
|
|
|
| 41 |
|
| 42 |
**Factors**: Not applicable.
|
| 43 |
|
| 44 |
+
**Metrics**: Validation loss on decoding correct molecules.
|
| 45 |
|
| 46 |
+
**Datasets**: 250,000 drug-like molecules from [ZINC](https://zinc.docking.org) (with a maximum atom number of 38).
|
| 47 |
|
| 48 |
**Ethical Considerations**: Unclear, please consult with original authors in case of questions.
|
| 49 |
|
|
|
|
| 54 |
## Citation
|
| 55 |
|
| 56 |
```bib
|
| 57 |
+
@article{you2018graph,
|
| 58 |
+
title={Graph convolutional policy network for goal-directed molecular graph generation},
|
| 59 |
+
author={You, Jiaxuan and Liu, Bowen and Ying, Zhitao and Pande, Vijay and Leskovec, Jure},
|
| 60 |
+
journal={Advances in neural information processing systems},
|
| 61 |
+
volume={31},
|
| 62 |
+
year={2018}
|
| 63 |
}
|
| 64 |
```
|
| 65 |
|
model_cards/description.md
CHANGED
|
@@ -1,6 +1,10 @@
|
|
| 1 |
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
For **examples** and **documentation** of the model parameters, please see below.
|
| 6 |
Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
|
|
|
|
| 1 |
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
| 2 |
|
| 3 |
+
|
| 4 |
+
[TorchDrug](https://github.com/DeepGraphLearning/torchdrug) is a PyTorch toolbox on graph models for drug discovery.
|
| 5 |
+
We, the developers of **GT4SD** (Generative Toolkit for Scientific Discovery), provide access to two graph-based molecular generative models distributed by TorchDrug:
|
| 6 |
+
- **GCPN**: Graph Convolutional Policy Network ([You et al., (2018), *NeurIPS*](https://proceedings.neurips.cc/paper/2018/hash/d60678e8f2ba9c540798ebbde31177e8-Abstract.html))
|
| 7 |
+
- **GraphAF**: GraphAF: a Flow-based Autoregressive Model for Molecular Graph Generation ([Shi et al., (2020), *ICLR*](https://openreview.net/forum?id=S1esMkHYPr))
|
| 8 |
|
| 9 |
For **examples** and **documentation** of the model parameters, please see below.
|
| 10 |
Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
|
model_cards/examples.csv
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
|
|
|
|
| 1 |
+
GCPN_zinc250k_v0,5
|
| 2 |
+
GCPN_qed_v0,10
|
| 3 |
+
GraphAF_plogp_v0,5
|
|
|
|
| 4 |
|
utils.py
CHANGED
|
@@ -1,21 +1,17 @@
|
|
| 1 |
-
import json
|
| 2 |
import logging
|
| 3 |
-
import os
|
| 4 |
from collections import defaultdict
|
| 5 |
-
from typing import
|
| 6 |
|
| 7 |
import mols2grid
|
| 8 |
import pandas as pd
|
| 9 |
-
from rdkit import Chem
|
| 10 |
-
from terminator.selfies import decoder
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
logger.addHandler(logging.NullHandler())
|
| 14 |
|
| 15 |
|
| 16 |
def draw_grid_generate(
|
| 17 |
-
seeds: List[str],
|
| 18 |
samples: List[str],
|
|
|
|
| 19 |
n_cols: int = 3,
|
| 20 |
size=(140, 200),
|
| 21 |
) -> str:
|
|
|
|
|
|
|
| 1 |
import logging
|
|
|
|
| 2 |
from collections import defaultdict
|
| 3 |
+
from typing import List
|
| 4 |
|
| 5 |
import mols2grid
|
| 6 |
import pandas as pd
|
|
|
|
|
|
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
logger.addHandler(logging.NullHandler())
|
| 10 |
|
| 11 |
|
| 12 |
def draw_grid_generate(
|
|
|
|
| 13 |
samples: List[str],
|
| 14 |
+
seeds: List[str] = [],
|
| 15 |
n_cols: int = 3,
|
| 16 |
size=(140, 200),
|
| 17 |
) -> str:
|