File size: 3,834 Bytes
5fc90f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
addc067
5fc90f2
15917cf
5fc90f2
 
 
 
 
15917cf
5fc90f2
 
 
 
 
 
15917cf
5fc90f2
 
 
 
 
 
15917cf
5fc90f2
15917cf
5fc90f2
 
15917cf
 
5fc90f2
 
 
15917cf
 
5fc90f2
e0d93b8
15917cf
e0d93b8
15917cf
e0d93b8
addc067
5fc90f2
addc067
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from Bio import PDB
from transformers import AutoTokenizer, AutoModelForCausalLM
from rdkit import Chem
import py3Dmol
import re
import io
import selfies as sf
import torch
import time
import gradio as gr

# إعداد العشوائية
torch.manual_seed(int(time.time()))
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(int(time.time()))

model_name = "ncfrey/ChemGPT-1.2B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

def load_pdb(file_obj):
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure('protein', file_obj)
    return structure

def get_protein_3d_html(pdb_str):
    view = py3Dmol.view(width=600, height=400)
    view.addModel(pdb_str, "pdb")
    view.setStyle({"cartoon": {"color": "spectrum"}})
    view.zoomTo()
    return view._make_html()

def clean_and_decode_selfies(raw_output):
    tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
    valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
    cleaned_selfies = ''.join(valid_tokens)
    try:
        smiles = sf.decoder(cleaned_selfies)
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            return Chem.MolToSmiles(mol)
    except:
        return None

def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
    valid_smiles = set()
    tries = 0
    while len(valid_smiles) < n and tries < n * 5:
        inputs = tokenizer(prompt, return_tensors="pt")
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            do_sample=True,
            temperature=1.0,
            top_k=100,
            pad_token_id=tokenizer.eos_token_id
        )
        selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
        smiles = clean_and_decode_selfies(selfies_output)
        if smiles:
            valid_smiles.add(smiles)
        tries += 1
    return list(valid_smiles)

def generate_from_pdb(pdb_file):
    try:
        pdb_str = pdb_file.read().decode('utf-8', errors='ignore')
        if len(pdb_str.strip()) == 0:
            return "❌ الملف فارغ أو غير صالح", None, None

        pdb_file_io = io.StringIO(pdb_str)
        try:
            load_pdb(pdb_file_io)
        except Exception as e:
            return f"❌ خطأ أثناء تحليل ملف PDB:\n{str(e)}", None, None

        html_3d = get_protein_3d_html(pdb_str)
        prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
        smiles_list = generate_multiple_valid_smiles(prompt, n=10)

        if not smiles_list:
            return "❌ لم يتم توليد أي SMILES صالحة", html_3d, None

        smiles_txt = "\n".join(smiles_list)
        smiles_file_path = "/tmp/generated_smiles.txt"
        with open(smiles_file_path, "w") as f:
            f.write(smiles_txt)

        return "✅ تم توليد المركبات بنجاح", html_3d, smiles_file_path
    except Exception as e:
        return f"❌ حدث خطأ:\n{str(e)}", None, None

css = """
body {background-color: #f0f9ff;}
h1 {color: #004d66; text-align: center;}
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("<h1>🔬 Drug-like Molecule Generation from PDB using ChemGPT</h1>")
    gr.Markdown("🧪 Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates.")
    with gr.Row():
        pdb_input = gr.File(label="📁 Upload PDB File")
        run_btn = gr.Button("🚀 Generate Compounds")
    status = gr.Textbox(label="📢 Status")
    view3d = gr.HTML(label="🧬 3D Structure Viewer")
    file_output = gr.File(label="📄 Download SMILES File")
    run_btn.click(fn=generate_from_pdb, inputs=pdb_input, outputs=[status, view3d, file_output])

demo.launch()