smiless / app.py
mohamed20003's picture
Update app.py
77b550a verified
raw
history blame
2.85 kB
from Bio import PDB
from transformers import AutoTokenizer, AutoModelForCausalLM
from rdkit import Chem
import selfies as sf
import torch
import time
import re
import io
import gradio as gr
torch.manual_seed(int(time.time()))
if torch.cuda.is_available():
torch.cuda.manual_seed_all(int(time.time()))
model_name = "ncfrey/ChemGPT-1.2B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
def load_pdb(file_obj):
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure('protein', file_obj)
return structure
def clean_and_decode_selfies(raw_output):
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
cleaned_selfies = ''.join(valid_tokens)
try:
smiles = sf.decoder(cleaned_selfies)
mol = Chem.MolFromSmiles(smiles)
if mol:
return Chem.MolToSmiles(mol)
except:
return None
def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
valid_smiles = set()
tries = 0
while len(valid_smiles) < n and tries < n * 5:
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_length=max_length,
do_sample=True,
temperature=1.0,
top_k=100,
pad_token_id=tokenizer.eos_token_id
)
selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
smiles = clean_and_decode_selfies(selfies_output)
if smiles:
valid_smiles.add(smiles)
tries += 1
return list(valid_smiles)
def generate_drugs_from_pdb(pdb_file):
try:
with open(pdb_file.name, 'r') as f:
pdb_str = f.read()
pdb_io = io.StringIO(pdb_str)
load_pdb(pdb_io)
prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
if not smiles_list:
return "❌ لم يتم توليد أي SMILES صالحة", ""
smiles_text = "\n".join(smiles_list)
return "✅ تم توليد المركبات بنجاح", smiles_text
except Exception as e:
return f"❌ خطأ: {str(e)}", ""
with gr.Blocks() as demo:
gr.Markdown("# 🧬 توليد مركبات دوائية من ملف PDB باستخدام ChemGPT")
with gr.Row():
pdb_input = gr.File(label="📁 ارفع ملف PDB")
run_btn = gr.Button("🚀 توليد SMILES")
status = gr.Textbox(label="📢 الحالة")
smiles_output = gr.Textbox(label="📄 المركبات (SMILES)", lines=10)
run_btn.click(fn=generate_drugs_from_pdb, inputs=pdb_input, outputs=[status, smiles_output])
demo.launch()