Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
from Bio import PDB
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
from rdkit import Chem
|
4 |
-
import py3Dmol
|
5 |
-
import re
|
6 |
-
import io
|
7 |
import selfies as sf
|
8 |
import torch
|
9 |
import time
|
|
|
|
|
10 |
import gradio as gr
|
11 |
|
12 |
# إعداد العشوائية
|
@@ -14,7 +13,7 @@ torch.manual_seed(int(time.time()))
|
|
14 |
if torch.cuda.is_available():
|
15 |
torch.cuda.manual_seed_all(int(time.time()))
|
16 |
|
17 |
-
# تحميل
|
18 |
model_name = "ncfrey/ChemGPT-1.2B"
|
19 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
20 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
@@ -24,13 +23,6 @@ def load_pdb(file_obj):
|
|
24 |
structure = parser.get_structure('protein', file_obj)
|
25 |
return structure
|
26 |
|
27 |
-
def get_protein_3d_html(pdb_str):
|
28 |
-
view = py3Dmol.view(width=600, height=400)
|
29 |
-
view.addModel(pdb_str, "pdb")
|
30 |
-
view.setStyle({"cartoon": {"color": "spectrum"}})
|
31 |
-
view.zoomTo()
|
32 |
-
return view._make_html()
|
33 |
-
|
34 |
def clean_and_decode_selfies(raw_output):
|
35 |
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
|
36 |
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
|
@@ -63,58 +55,33 @@ def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
|
|
63 |
tries += 1
|
64 |
return list(valid_smiles)
|
65 |
|
66 |
-
def
|
67 |
try:
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
return "❌ الملف فارغ أو غير صالح", None, None
|
73 |
-
|
74 |
-
pdb_file_io = io.StringIO(pdb_str)
|
75 |
-
try:
|
76 |
-
load_pdb(pdb_file_io)
|
77 |
-
except Exception as e:
|
78 |
-
return f"❌ خطأ أثناء تحليل ملف PDB:\n{str(e)}", None, None
|
79 |
-
|
80 |
-
html_3d = get_protein_3d_html(pdb_str)
|
81 |
|
82 |
prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
|
83 |
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
|
84 |
|
85 |
if not smiles_list:
|
86 |
-
return "❌ لم يتم توليد أي SMILES صالحة",
|
87 |
-
|
88 |
-
smiles_txt = "\n".join(smiles_list)
|
89 |
-
smiles_file_path = "/tmp/generated_smiles.txt"
|
90 |
-
with open(smiles_file_path, "w") as f:
|
91 |
-
f.write(smiles_txt)
|
92 |
|
93 |
-
|
|
|
94 |
|
95 |
except Exception as e:
|
96 |
-
return f"❌
|
97 |
-
|
98 |
-
css = """
|
99 |
-
body {background-color: #f0f9ff;}
|
100 |
-
h1 {color: #004d66; text-align: center;}
|
101 |
-
"""
|
102 |
-
|
103 |
-
with gr.Blocks(css=css) as demo:
|
104 |
-
gr.Markdown("<h1>🔬 Drug-like Molecule Generation from PDB using ChemGPT</h1>")
|
105 |
-
gr.Markdown("🧪 Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates.")
|
106 |
|
|
|
|
|
|
|
107 |
with gr.Row():
|
108 |
-
pdb_input = gr.File(label="📁
|
109 |
-
run_btn = gr.Button("🚀
|
110 |
-
status = gr.Textbox(label="📢
|
111 |
-
|
112 |
-
|
113 |
-
run_btn.click(
|
114 |
-
fn=generate_from_pdb,
|
115 |
-
inputs=pdb_input,
|
116 |
-
outputs=[status, view3d, file_output]
|
117 |
-
)
|
118 |
|
119 |
-
|
120 |
-
demo.launch()
|
|
|
1 |
from Bio import PDB
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
from rdkit import Chem
|
|
|
|
|
|
|
4 |
import selfies as sf
|
5 |
import torch
|
6 |
import time
|
7 |
+
import re
|
8 |
+
import io
|
9 |
import gradio as gr
|
10 |
|
11 |
# إعداد العشوائية
|
|
|
13 |
if torch.cuda.is_available():
|
14 |
torch.cuda.manual_seed_all(int(time.time()))
|
15 |
|
16 |
+
# تحميل نموذج ChemGPT
|
17 |
model_name = "ncfrey/ChemGPT-1.2B"
|
18 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
19 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
|
|
23 |
structure = parser.get_structure('protein', file_obj)
|
24 |
return structure
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def clean_and_decode_selfies(raw_output):
|
27 |
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
|
28 |
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
|
|
|
55 |
tries += 1
|
56 |
return list(valid_smiles)
|
57 |
|
58 |
+
def generate_drugs_from_pdb(pdb_file):
|
59 |
try:
|
60 |
+
pdb_bytes = pdb_file.read()
|
61 |
+
pdb_str = pdb_bytes.decode('utf-8', errors='ignore')
|
62 |
+
pdb_io = io.StringIO(pdb_str)
|
63 |
+
load_pdb(pdb_io)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
|
66 |
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
|
67 |
|
68 |
if not smiles_list:
|
69 |
+
return "❌ لم يتم توليد أي SMILES صالحة", ""
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
smiles_text = "\n".join(smiles_list)
|
72 |
+
return "✅ تم توليد المركبات بنجاح", smiles_text
|
73 |
|
74 |
except Exception as e:
|
75 |
+
return f"❌ خطأ: {str(e)}", ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
# واجهة Gradio
|
78 |
+
with gr.Blocks() as demo:
|
79 |
+
gr.Markdown("# 🧬 توليد مركبات دوائية من ملف PDB باستخدام ChemGPT")
|
80 |
with gr.Row():
|
81 |
+
pdb_input = gr.File(label="📁 ارفع ملف PDB")
|
82 |
+
run_btn = gr.Button("🚀 توليد SMILES")
|
83 |
+
status = gr.Textbox(label="📢 الحالة")
|
84 |
+
smiles_output = gr.Textbox(label="📄 المركبات (SMILES)", lines=10)
|
85 |
+
run_btn.click(fn=generate_drugs_from_pdb, inputs=pdb_input, outputs=[status, smiles_output])
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
+
demo.launch()
|
|