Spaces:
Sleeping
Sleeping
File size: 2,852 Bytes
5fc90f2 0fcfaed 5fc90f2 0fcfaed 5fc90f2 77b550a 0fcfaed 161f144 5fc90f2 0fcfaed 5fc90f2 0fcfaed 161f144 5fc90f2 0fcfaed 161f144 0fcfaed 5fc90f2 0fcfaed 5fc90f2 0fcfaed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
from Bio import PDB
from transformers import AutoTokenizer, AutoModelForCausalLM
from rdkit import Chem
import selfies as sf
import torch
import time
import re
import io
import gradio as gr
torch.manual_seed(int(time.time()))
if torch.cuda.is_available():
torch.cuda.manual_seed_all(int(time.time()))
model_name = "ncfrey/ChemGPT-1.2B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
def load_pdb(file_obj):
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure('protein', file_obj)
return structure
def clean_and_decode_selfies(raw_output):
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
cleaned_selfies = ''.join(valid_tokens)
try:
smiles = sf.decoder(cleaned_selfies)
mol = Chem.MolFromSmiles(smiles)
if mol:
return Chem.MolToSmiles(mol)
except:
return None
def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
valid_smiles = set()
tries = 0
while len(valid_smiles) < n and tries < n * 5:
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_length=max_length,
do_sample=True,
temperature=1.0,
top_k=100,
pad_token_id=tokenizer.eos_token_id
)
selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
smiles = clean_and_decode_selfies(selfies_output)
if smiles:
valid_smiles.add(smiles)
tries += 1
return list(valid_smiles)
def generate_drugs_from_pdb(pdb_file):
try:
with open(pdb_file.name, 'r') as f:
pdb_str = f.read()
pdb_io = io.StringIO(pdb_str)
load_pdb(pdb_io)
prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
if not smiles_list:
return "❌ لم يتم توليد أي SMILES صالحة", ""
smiles_text = "\n".join(smiles_list)
return "✅ تم توليد المركبات بنجاح", smiles_text
except Exception as e:
return f"❌ خطأ: {str(e)}", ""
with gr.Blocks() as demo:
gr.Markdown("# 🧬 توليد مركبات دوائية من ملف PDB باستخدام ChemGPT")
with gr.Row():
pdb_input = gr.File(label="📁 ارفع ملف PDB")
run_btn = gr.Button("🚀 توليد SMILES")
status = gr.Textbox(label="📢 الحالة")
smiles_output = gr.Textbox(label="📄 المركبات (SMILES)", lines=10)
run_btn.click(fn=generate_drugs_from_pdb, inputs=pdb_input, outputs=[status, smiles_output])
demo.launch()
|