File size: 4,507 Bytes
52af9bb
5fc90f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52af9bb
5fc90f2
 
 
 
 
52af9bb
5fc90f2
 
 
 
 
 
 
52af9bb
5fc90f2
 
 
 
 
 
52af9bb
5fc90f2
 
52af9bb
5fc90f2
52af9bb
5fc90f2
52af9bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5fc90f2
 
 
52af9bb
 
 
 
5fc90f2
e0d93b8
52af9bb
 
e0d93b8
52af9bb
e0d93b8
5fc90f2
 
 
 
 
 
 
52af9bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# app.py
from Bio import PDB
from transformers import AutoTokenizer, AutoModelForCausalLM
from rdkit import Chem
import py3Dmol
import re
import io
import selfies as sf
import torch
import time
import gradio as gr

# ุฅุนุฏุงุฏ ุงู„ุนุดูˆุงุฆูŠุฉ
torch.manual_seed(int(time.time()))
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(int(time.time()))

# ุชุญู…ูŠู„ ุงู„ู†ู…ูˆุฐุฌ
model_name = "ncfrey/ChemGPT-1.2B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

def load_pdb(file_obj):
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure('protein', file_obj)
    return structure

def get_protein_3d_html(pdb_str):
    view = py3Dmol.view(width=600, height=400)
    view.addModel(pdb_str, "pdb")
    view.setStyle({"cartoon": {"color": "spectrum"}})
    view.zoomTo()
    return view._make_html()

def clean_and_decode_selfies(raw_output):
    tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
    valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
    cleaned_selfies = ''.join(valid_tokens)
    try:
        smiles = sf.decoder(cleaned_selfies)
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            return Chem.MolToSmiles(mol)
    except:
        return None

def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
    valid_smiles = set()
    tries = 0
    while len(valid_smiles) < n and tries < n * 5:
        inputs = tokenizer(prompt, return_tensors="pt")
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            do_sample=True,
            temperature=1.0,
            top_k=100,
            pad_token_id=tokenizer.eos_token_id
        )
        selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
        smiles = clean_and_decode_selfies(selfies_output)
        if smiles:
            valid_smiles.add(smiles)
        tries += 1
    return list(valid_smiles)

def generate_from_pdb(pdb_file):
    try:
        if isinstance(pdb_file, (str, bytes)):
            pdb_str = pdb_file if isinstance(pdb_file, str) else pdb_file.decode('utf-8', errors='ignore')
        else:
            pdb_bytes = pdb_file.read()
            pdb_str = pdb_bytes.decode('utf-8', errors='ignore')

        if len(pdb_str.strip()) == 0:
            return "โŒ The file is empty or invalid.", None, None

        pdb_file_io = io.StringIO(pdb_str)
        try:
            load_pdb(pdb_file_io)
        except Exception as e:
            return f"โŒ Error parsing the PDB file:\n{str(e)}", None, None

        html_3d = get_protein_3d_html(pdb_str)

        prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
        smiles_list = generate_multiple_valid_smiles(prompt, n=10)

        if not smiles_list:
            return "โŒ No valid SMILES generated.", html_3d, None

        smiles_txt = "\n".join(smiles_list)
        smiles_file_path = "/tmp/generated_smiles.txt"
        with open(smiles_file_path, "w") as f:
            f.write(smiles_txt)

        return "โœ… Molecules generated successfully.", html_3d, smiles_file_path

    except Exception as e:
        return f"โŒ An unexpected error occurred:\n{str(e)}", None, None

# CSS to beautify the interface
css = """
body {
    background-color: #f0f9ff;
    font-family: 'Segoe UI', sans-serif;
}
h1 {
    color: #003d66;
    text-align: center;
    font-size: 32px;
}
.gr-box {
    border: 1px solid #cce7ff;
    background-color: #ffffff;
    border-radius: 15px;
    padding: 20px;
    box-shadow: 0 2px 8px rgba(0, 128, 255, 0.1);
}
button {
    background-color: #007acc !important;
    color: white !important;
    font-weight: bold;
    border-radius: 10px !important;
}
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("""
    <h1>๐Ÿ”ฌ Drug-like Molecule Generation from PDB using ChemGPT</h1>
    <p>๐Ÿงช Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates.</p>
    """)
    with gr.Row():
        pdb_input = gr.File(label="๐Ÿ“ Upload PDB File")
        run_btn = gr.Button("๐Ÿš€ Generate Molecules")

    status = gr.Textbox(label="๐Ÿ“ข Status")
    view3d = gr.HTML(label="๐Ÿงฌ 3D Structure View")
    file_output = gr.File(label="๐Ÿ“„ Download SMILES File")

    run_btn.click(
        fn=generate_from_pdb,
        inputs=pdb_input,
        outputs=[status, view3d, file_output]
    )

demo.launch(share=True)