mohamed20003 commited on
Commit
5fc90f2
·
verified ·
1 Parent(s): 8afb1be

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -0
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from Bio import PDB
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from rdkit import Chem
4
+ import py3Dmol
5
+ import re
6
+ import io
7
+ import selfies as sf
8
+ import torch
9
+ import time
10
+ import gradio as gr
11
+
12
+ # إعداد العشوائية
13
+ torch.manual_seed(int(time.time()))
14
+ if torch.cuda.is_available():
15
+ torch.cuda.manual_seed_all(int(time.time()))
16
+
17
+ # تحميل النموذج
18
+ model_name = "ncfrey/ChemGPT-1.2B"
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ model = AutoModelForCausalLM.from_pretrained(model_name)
21
+
22
+ def load_pdb(file_obj):
23
+ parser = PDB.PDBParser(QUIET=True)
24
+ structure = parser.get_structure('protein', file_obj)
25
+ return structure
26
+
27
+ def get_protein_3d_html(pdb_str):
28
+ view = py3Dmol.view(width=600, height=400)
29
+ view.addModel(pdb_str, "pdb")
30
+ view.setStyle({"cartoon": {"color": "spectrum"}})
31
+ view.zoomTo()
32
+ return view._make_html()
33
+
34
+ def clean_and_decode_selfies(raw_output):
35
+ tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
36
+ valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
37
+ cleaned_selfies = ''.join(valid_tokens)
38
+ try:
39
+ smiles = sf.decoder(cleaned_selfies)
40
+ mol = Chem.MolFromSmiles(smiles)
41
+ if mol:
42
+ return Chem.MolToSmiles(mol)
43
+ except:
44
+ return None
45
+
46
+ def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
47
+ valid_smiles = set()
48
+ tries = 0
49
+ while len(valid_smiles) < n and tries < n * 5:
50
+ inputs = tokenizer(prompt, return_tensors="pt")
51
+ outputs = model.generate(
52
+ **inputs,
53
+ max_length=max_length,
54
+ do_sample=True,
55
+ temperature=1.0,
56
+ top_k=100,
57
+ pad_token_id=tokenizer.eos_token_id
58
+ )
59
+ selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
60
+ smiles = clean_and_decode_selfies(selfies_output)
61
+ if smiles:
62
+ valid_smiles.add(smiles)
63
+ tries += 1
64
+ return list(valid_smiles)
65
+
66
+ def generate_from_pdb(pdb_file):
67
+ try:
68
+ if isinstance(pdb_file, (str, bytes)):
69
+ pdb_str = pdb_file if isinstance(pdb_file, str) else pdb_file.decode('utf-8', errors='ignore')
70
+ else:
71
+ pdb_bytes = pdb_file.read()
72
+ pdb_str = pdb_bytes.decode('utf-8', errors='ignore')
73
+
74
+ if len(pdb_str.strip()) == 0:
75
+ return "❌ الملف فارغ أو غير صالح", None, None
76
+
77
+ pdb_file_io = io.StringIO(pdb_str)
78
+ try:
79
+ load_pdb(pdb_file_io)
80
+ except Exception as e:
81
+ return f"❌ خطأ أثناء تحليل ملف PDB:\n{str(e)}", None, None
82
+
83
+ html_3d = get_protein_3d_html(pdb_str)
84
+
85
+ prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
86
+ smiles_list = generate_multiple_valid_smiles(prompt, n=10)
87
+
88
+ if not smiles_list:
89
+ return "❌ لم يتم توليد أي SMILES صالحة", html_3d, None
90
+
91
+ smiles_txt = "\n".join(smiles_list)
92
+ smiles_file_path = "/tmp/generated_smiles.txt"
93
+ with open(smiles_file_path, "w") as f:
94
+ f.write(smiles_txt)
95
+
96
+ return "✅ تم توليد المركبات بنجاح", html_3d, smiles_file_path
97
+
98
+ except Exception as e:
99
+ return f"❌ حدث خطأ:\n{str(e)}", None, None
100
+
101
+ css = """
102
+ body {background-color: #f0f9ff;}
103
+ h1 {color: #004d66; text-align: center;}
104
+ """
105
+
106
+ with gr.Blocks(css=css) as demo:
107
+ gr.Markdown("<h1>🔬 توليد مركبات دوائية باستخدام ChemGPT من ملف PDB</h1>")
108
+ gr.Markdown("🧪 ارفع ملف PDB يحتوي على طفرات في بروتين KRAS وسيتم توليد مركبات SMILES مناسبة.")
109
+
110
+ with gr.Row():
111
+ pdb_input = gr.File(label="📁 ارفع ملف PDB")
112
+ run_btn = gr.Button("🚀 توليد المركبات")
113
+
114
+ status = gr.Textbox(label="📢 الحالة")
115
+ view3d = gr.HTML(label="🧬 عرض ثلاثي الأبعاد")
116
+ file_output = gr.File(label="📄 تحميل ملف SMILES")
117
+
118
+ run_btn.click(
119
+ fn=generate_from_pdb,
120
+ inputs=pdb_input,
121
+ outputs=[status, view3d, file_output]
122
+ )
123
+
124
+ demo.launch()