smiless / app.py
mohamed20003's picture
Update app.py
52af9bb verified
raw
history blame
4.51 kB
# app.py
from Bio import PDB
from transformers import AutoTokenizer, AutoModelForCausalLM
from rdkit import Chem
import py3Dmol
import re
import io
import selfies as sf
import torch
import time
import gradio as gr
# Ψ₯ΨΉΨ―Ψ§Ψ― Ψ§Ω„ΨΉΨ΄ΩˆΨ§Ψ¦ΩŠΨ©
torch.manual_seed(int(time.time()))
if torch.cuda.is_available():
torch.cuda.manual_seed_all(int(time.time()))
# ΨͺΨ­Ω…ΩŠΩ„ Ψ§Ω„Ω†Ω…ΩˆΨ°Ψ¬
model_name = "ncfrey/ChemGPT-1.2B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
def load_pdb(file_obj):
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure('protein', file_obj)
return structure
def get_protein_3d_html(pdb_str):
view = py3Dmol.view(width=600, height=400)
view.addModel(pdb_str, "pdb")
view.setStyle({"cartoon": {"color": "spectrum"}})
view.zoomTo()
return view._make_html()
def clean_and_decode_selfies(raw_output):
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
cleaned_selfies = ''.join(valid_tokens)
try:
smiles = sf.decoder(cleaned_selfies)
mol = Chem.MolFromSmiles(smiles)
if mol:
return Chem.MolToSmiles(mol)
except:
return None
def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
valid_smiles = set()
tries = 0
while len(valid_smiles) < n and tries < n * 5:
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_length=max_length,
do_sample=True,
temperature=1.0,
top_k=100,
pad_token_id=tokenizer.eos_token_id
)
selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
smiles = clean_and_decode_selfies(selfies_output)
if smiles:
valid_smiles.add(smiles)
tries += 1
return list(valid_smiles)
def generate_from_pdb(pdb_file):
try:
if isinstance(pdb_file, (str, bytes)):
pdb_str = pdb_file if isinstance(pdb_file, str) else pdb_file.decode('utf-8', errors='ignore')
else:
pdb_bytes = pdb_file.read()
pdb_str = pdb_bytes.decode('utf-8', errors='ignore')
if len(pdb_str.strip()) == 0:
return "❌ The file is empty or invalid.", None, None
pdb_file_io = io.StringIO(pdb_str)
try:
load_pdb(pdb_file_io)
except Exception as e:
return f"❌ Error parsing the PDB file:\n{str(e)}", None, None
html_3d = get_protein_3d_html(pdb_str)
prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
if not smiles_list:
return "❌ No valid SMILES generated.", html_3d, None
smiles_txt = "\n".join(smiles_list)
smiles_file_path = "/tmp/generated_smiles.txt"
with open(smiles_file_path, "w") as f:
f.write(smiles_txt)
return "βœ… Molecules generated successfully.", html_3d, smiles_file_path
except Exception as e:
return f"❌ An unexpected error occurred:\n{str(e)}", None, None
# CSS to beautify the interface
css = """
body {
background-color: #f0f9ff;
font-family: 'Segoe UI', sans-serif;
}
h1 {
color: #003d66;
text-align: center;
font-size: 32px;
}
.gr-box {
border: 1px solid #cce7ff;
background-color: #ffffff;
border-radius: 15px;
padding: 20px;
box-shadow: 0 2px 8px rgba(0, 128, 255, 0.1);
}
button {
background-color: #007acc !important;
color: white !important;
font-weight: bold;
border-radius: 10px !important;
}
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("""
<h1>πŸ”¬ Drug-like Molecule Generation from PDB using ChemGPT</h1>
<p>πŸ§ͺ Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates.</p>
""")
with gr.Row():
pdb_input = gr.File(label="πŸ“ Upload PDB File")
run_btn = gr.Button("πŸš€ Generate Molecules")
status = gr.Textbox(label="πŸ“’ Status")
view3d = gr.HTML(label="🧬 3D Structure View")
file_output = gr.File(label="πŸ“„ Download SMILES File")
run_btn.click(
fn=generate_from_pdb,
inputs=pdb_input,
outputs=[status, view3d, file_output]
)
demo.launch(share=True)