Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
-
#
|
|
|
|
|
|
|
2 |
from Bio import PDB
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
from rdkit import Chem
|
@@ -20,11 +23,13 @@ model_name = "ncfrey/ChemGPT-1.2B"
|
|
20 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
21 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
22 |
|
|
|
23 |
def load_pdb(file_obj):
|
24 |
parser = PDB.PDBParser(QUIET=True)
|
25 |
structure = parser.get_structure('protein', file_obj)
|
26 |
return structure
|
27 |
|
|
|
28 |
def get_protein_3d_html(pdb_str):
|
29 |
view = py3Dmol.view(width=600, height=400)
|
30 |
view.addModel(pdb_str, "pdb")
|
@@ -32,6 +37,7 @@ def get_protein_3d_html(pdb_str):
|
|
32 |
view.zoomTo()
|
33 |
return view._make_html()
|
34 |
|
|
|
35 |
def clean_and_decode_selfies(raw_output):
|
36 |
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
|
37 |
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
|
@@ -44,6 +50,7 @@ def clean_and_decode_selfies(raw_output):
|
|
44 |
except:
|
45 |
return None
|
46 |
|
|
|
47 |
def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
|
48 |
valid_smiles = set()
|
49 |
tries = 0
|
@@ -64,6 +71,7 @@ def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
|
|
64 |
tries += 1
|
65 |
return list(valid_smiles)
|
66 |
|
|
|
67 |
def generate_from_pdb(pdb_file):
|
68 |
try:
|
69 |
if isinstance(pdb_file, (str, bytes)):
|
@@ -73,13 +81,13 @@ def generate_from_pdb(pdb_file):
|
|
73 |
pdb_str = pdb_bytes.decode('utf-8', errors='ignore')
|
74 |
|
75 |
if len(pdb_str.strip()) == 0:
|
76 |
-
return "โ
|
77 |
|
78 |
pdb_file_io = io.StringIO(pdb_str)
|
79 |
try:
|
80 |
load_pdb(pdb_file_io)
|
81 |
except Exception as e:
|
82 |
-
return f"โ
|
83 |
|
84 |
html_3d = get_protein_3d_html(pdb_str)
|
85 |
|
@@ -87,61 +95,38 @@ def generate_from_pdb(pdb_file):
|
|
87 |
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
|
88 |
|
89 |
if not smiles_list:
|
90 |
-
return "โ
|
91 |
|
92 |
smiles_txt = "\n".join(smiles_list)
|
93 |
smiles_file_path = "/tmp/generated_smiles.txt"
|
94 |
with open(smiles_file_path, "w") as f:
|
95 |
f.write(smiles_txt)
|
96 |
|
97 |
-
return "โ
|
98 |
|
99 |
except Exception as e:
|
100 |
-
return f"โ
|
101 |
|
102 |
-
# CSS
|
103 |
css = """
|
104 |
-
body {
|
105 |
-
|
106 |
-
font-family: 'Segoe UI', sans-serif;
|
107 |
-
}
|
108 |
-
h1 {
|
109 |
-
color: #003d66;
|
110 |
-
text-align: center;
|
111 |
-
font-size: 32px;
|
112 |
-
}
|
113 |
-
.gr-box {
|
114 |
-
border: 1px solid #cce7ff;
|
115 |
-
background-color: #ffffff;
|
116 |
-
border-radius: 15px;
|
117 |
-
padding: 20px;
|
118 |
-
box-shadow: 0 2px 8px rgba(0, 128, 255, 0.1);
|
119 |
-
}
|
120 |
-
button {
|
121 |
-
background-color: #007acc !important;
|
122 |
-
color: white !important;
|
123 |
-
font-weight: bold;
|
124 |
-
border-radius: 10px !important;
|
125 |
-
}
|
126 |
"""
|
127 |
|
|
|
128 |
with gr.Blocks(css=css) as demo:
|
129 |
-
gr.Markdown(""
|
130 |
-
|
131 |
-
<p>๐งช Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates.</p>
|
132 |
-
""")
|
133 |
with gr.Row():
|
134 |
pdb_input = gr.File(label="๐ Upload PDB File")
|
135 |
-
run_btn = gr.Button("๐ Generate
|
136 |
-
|
137 |
status = gr.Textbox(label="๐ข Status")
|
138 |
-
view3d = gr.HTML(label="๐งฌ 3D Structure
|
139 |
file_output = gr.File(label="๐ Download SMILES File")
|
140 |
-
|
141 |
run_btn.click(
|
142 |
fn=generate_from_pdb,
|
143 |
inputs=pdb_input,
|
144 |
outputs=[status, view3d, file_output]
|
145 |
)
|
146 |
|
147 |
-
demo.launch(share=True)
|
|
|
1 |
+
# ุงูุชุซุจูุช (ูู ู
ุด ู
ุซุจุช)
|
2 |
+
!pip install rdkit-pypi py3Dmol transformers selfies biopython gradio -q
|
3 |
+
|
4 |
+
# ุงูุงุณุชูุฑุงุฏ
|
5 |
from Bio import PDB
|
6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
7 |
from rdkit import Chem
|
|
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
24 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
25 |
|
26 |
+
# ุชุญู
ูู ู
ูู PDB
|
27 |
def load_pdb(file_obj):
|
28 |
parser = PDB.PDBParser(QUIET=True)
|
29 |
structure = parser.get_structure('protein', file_obj)
|
30 |
return structure
|
31 |
|
32 |
+
# ุนุฑุถ ุงูุจุฑูุชูู ุซูุงุซู ุงูุฃุจุนุงุฏ
|
33 |
def get_protein_3d_html(pdb_str):
|
34 |
view = py3Dmol.view(width=600, height=400)
|
35 |
view.addModel(pdb_str, "pdb")
|
|
|
37 |
view.zoomTo()
|
38 |
return view._make_html()
|
39 |
|
40 |
+
# ุชูุธูู ูุชุญููู SELFIES ุฅูู SMILES
|
41 |
def clean_and_decode_selfies(raw_output):
|
42 |
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
|
43 |
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
|
|
|
50 |
except:
|
51 |
return None
|
52 |
|
53 |
+
# ุชูููุฏ SMILES
|
54 |
def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
|
55 |
valid_smiles = set()
|
56 |
tries = 0
|
|
|
71 |
tries += 1
|
72 |
return list(valid_smiles)
|
73 |
|
74 |
+
# ุงููุธููุฉ ุงูุฑุฆูุณูุฉ
|
75 |
def generate_from_pdb(pdb_file):
|
76 |
try:
|
77 |
if isinstance(pdb_file, (str, bytes)):
|
|
|
81 |
pdb_str = pdb_bytes.decode('utf-8', errors='ignore')
|
82 |
|
83 |
if len(pdb_str.strip()) == 0:
|
84 |
+
return "โ ุงูู
ูู ูุงุฑุบ ุฃู ุบูุฑ ุตุงูุญ", None, None
|
85 |
|
86 |
pdb_file_io = io.StringIO(pdb_str)
|
87 |
try:
|
88 |
load_pdb(pdb_file_io)
|
89 |
except Exception as e:
|
90 |
+
return f"โ ุฎุทุฃ ุฃุซูุงุก ุชุญููู ู
ูู PDB:\n{str(e)}", None, None
|
91 |
|
92 |
html_3d = get_protein_3d_html(pdb_str)
|
93 |
|
|
|
95 |
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
|
96 |
|
97 |
if not smiles_list:
|
98 |
+
return "โ ูู
ูุชู
ุชูููุฏ ุฃู SMILES ุตุงูุญุฉ", html_3d, None
|
99 |
|
100 |
smiles_txt = "\n".join(smiles_list)
|
101 |
smiles_file_path = "/tmp/generated_smiles.txt"
|
102 |
with open(smiles_file_path, "w") as f:
|
103 |
f.write(smiles_txt)
|
104 |
|
105 |
+
return "โ
ุชู
ุชูููุฏ ุงูู
ุฑูุจุงุช ุจูุฌุงุญ", html_3d, smiles_file_path
|
106 |
|
107 |
except Exception as e:
|
108 |
+
return f"โ ุญุฏุซ ุฎุทุฃ:\n{str(e)}", None, None
|
109 |
|
110 |
+
# CSS ูุชุฌู
ูู ุงููุงุฌูุฉ
|
111 |
css = """
|
112 |
+
body {background-color: #f0f9ff;}
|
113 |
+
h1 {color: #004d66; text-align: center;}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
"""
|
115 |
|
116 |
+
# ูุงุฌูุฉ Gradio
|
117 |
with gr.Blocks(css=css) as demo:
|
118 |
+
gr.Markdown("<h1>๐ฌ Drug-like Molecule Generation from PDB using ChemGPT</h1>")
|
119 |
+
gr.Markdown("๐งช Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates.")
|
|
|
|
|
120 |
with gr.Row():
|
121 |
pdb_input = gr.File(label="๐ Upload PDB File")
|
122 |
+
run_btn = gr.Button("๐ Generate Compounds")
|
|
|
123 |
status = gr.Textbox(label="๐ข Status")
|
124 |
+
view3d = gr.HTML(label="๐งฌ 3D Structure Viewer")
|
125 |
file_output = gr.File(label="๐ Download SMILES File")
|
|
|
126 |
run_btn.click(
|
127 |
fn=generate_from_pdb,
|
128 |
inputs=pdb_input,
|
129 |
outputs=[status, view3d, file_output]
|
130 |
)
|
131 |
|
132 |
+
demo.launch(share=True) # ุฎูููุง True ูู ุนุงูุฒ ูููู ุนุงู
|