Spaces:
Sleeping
Sleeping
# app.py | |
from Bio import PDB | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from rdkit import Chem | |
import py3Dmol | |
import re | |
import io | |
import selfies as sf | |
import torch | |
import time | |
import gradio as gr | |
# Ψ₯ΨΉΨ―Ψ§Ψ― Ψ§ΩΨΉΨ΄ΩΨ§Ψ¦ΩΨ© | |
torch.manual_seed(int(time.time())) | |
if torch.cuda.is_available(): | |
torch.cuda.manual_seed_all(int(time.time())) | |
# ΨͺΨΩ ΩΩ Ψ§ΩΩΩ ΩΨ°Ψ¬ | |
model_name = "ncfrey/ChemGPT-1.2B" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
def load_pdb(file_obj): | |
parser = PDB.PDBParser(QUIET=True) | |
structure = parser.get_structure('protein', file_obj) | |
return structure | |
def get_protein_3d_html(pdb_str): | |
view = py3Dmol.view(width=600, height=400) | |
view.addModel(pdb_str, "pdb") | |
view.setStyle({"cartoon": {"color": "spectrum"}}) | |
view.zoomTo() | |
return view._make_html() | |
def clean_and_decode_selfies(raw_output): | |
tokens = re.findall(r'\[[^\[\]]+\]', raw_output) | |
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])] | |
cleaned_selfies = ''.join(valid_tokens) | |
try: | |
smiles = sf.decoder(cleaned_selfies) | |
mol = Chem.MolFromSmiles(smiles) | |
if mol: | |
return Chem.MolToSmiles(mol) | |
except: | |
return None | |
def generate_multiple_valid_smiles(prompt, n=10, max_length=100): | |
valid_smiles = set() | |
tries = 0 | |
while len(valid_smiles) < n and tries < n * 5: | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate( | |
**inputs, | |
max_length=max_length, | |
do_sample=True, | |
temperature=1.0, | |
top_k=100, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
smiles = clean_and_decode_selfies(selfies_output) | |
if smiles: | |
valid_smiles.add(smiles) | |
tries += 1 | |
return list(valid_smiles) | |
def generate_from_pdb(pdb_file): | |
try: | |
if isinstance(pdb_file, (str, bytes)): | |
pdb_str = pdb_file if isinstance(pdb_file, str) else pdb_file.decode('utf-8', errors='ignore') | |
else: | |
pdb_bytes = pdb_file.read() | |
pdb_str = pdb_bytes.decode('utf-8', errors='ignore') | |
if len(pdb_str.strip()) == 0: | |
return "β The file is empty or invalid.", None, None | |
pdb_file_io = io.StringIO(pdb_str) | |
try: | |
load_pdb(pdb_file_io) | |
except Exception as e: | |
return f"β Error parsing the PDB file:\n{str(e)}", None, None | |
html_3d = get_protein_3d_html(pdb_str) | |
prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein" | |
smiles_list = generate_multiple_valid_smiles(prompt, n=10) | |
if not smiles_list: | |
return "β No valid SMILES generated.", html_3d, None | |
smiles_txt = "\n".join(smiles_list) | |
smiles_file_path = "/tmp/generated_smiles.txt" | |
with open(smiles_file_path, "w") as f: | |
f.write(smiles_txt) | |
return "β Molecules generated successfully.", html_3d, smiles_file_path | |
except Exception as e: | |
return f"β An unexpected error occurred:\n{str(e)}", None, None | |
# CSS to beautify the interface | |
css = """ | |
body { | |
background-color: #f0f9ff; | |
font-family: 'Segoe UI', sans-serif; | |
} | |
h1 { | |
color: #003d66; | |
text-align: center; | |
font-size: 32px; | |
} | |
.gr-box { | |
border: 1px solid #cce7ff; | |
background-color: #ffffff; | |
border-radius: 15px; | |
padding: 20px; | |
box-shadow: 0 2px 8px rgba(0, 128, 255, 0.1); | |
} | |
button { | |
background-color: #007acc !important; | |
color: white !important; | |
font-weight: bold; | |
border-radius: 10px !important; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown(""" | |
<h1>π¬ Drug-like Molecule Generation from PDB using ChemGPT</h1> | |
<p>π§ͺ Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates.</p> | |
""") | |
with gr.Row(): | |
pdb_input = gr.File(label="π Upload PDB File") | |
run_btn = gr.Button("π Generate Molecules") | |
status = gr.Textbox(label="π’ Status") | |
view3d = gr.HTML(label="𧬠3D Structure View") | |
file_output = gr.File(label="π Download SMILES File") | |
run_btn.click( | |
fn=generate_from_pdb, | |
inputs=pdb_input, | |
outputs=[status, view3d, file_output] | |
) | |
demo.launch(share=True) | |