Spaces:
Sleeping
Sleeping
File size: 4,144 Bytes
5fc90f2 7697b42 5fc90f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
from Bio import PDB
from transformers import AutoTokenizer, AutoModelForCausalLM
from rdkit import Chem
import py3Dmol
import re
import io
import selfies as sf
import torch
import time
import gradio as gr
# إعداد العشوائية
torch.manual_seed(int(time.time()))
if torch.cuda.is_available():
torch.cuda.manual_seed_all(int(time.time()))
# تحميل النموذج
model_name = "ncfrey/ChemGPT-1.2B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
def load_pdb(file_obj):
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure('protein', file_obj)
return structure
def get_protein_3d_html(pdb_str):
view = py3Dmol.view(width=600, height=400)
view.addModel(pdb_str, "pdb")
view.setStyle({"cartoon": {"color": "spectrum"}})
view.zoomTo()
return view._make_html()
def clean_and_decode_selfies(raw_output):
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
cleaned_selfies = ''.join(valid_tokens)
try:
smiles = sf.decoder(cleaned_selfies)
mol = Chem.MolFromSmiles(smiles)
if mol:
return Chem.MolToSmiles(mol)
except:
return None
def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
valid_smiles = set()
tries = 0
while len(valid_smiles) < n and tries < n * 5:
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_length=max_length,
do_sample=True,
temperature=1.0,
top_k=100,
pad_token_id=tokenizer.eos_token_id
)
selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
smiles = clean_and_decode_selfies(selfies_output)
if smiles:
valid_smiles.add(smiles)
tries += 1
return list(valid_smiles)
def generate_from_pdb(pdb_file):
try:
if isinstance(pdb_file, (str, bytes)):
pdb_str = pdb_file if isinstance(pdb_file, str) else pdb_file.decode('utf-8', errors='ignore')
else:
pdb_bytes = pdb_file.read()
pdb_str = pdb_bytes.decode('utf-8', errors='ignore')
if len(pdb_str.strip()) == 0:
return "❌ الملف فارغ أو غير صالح", None, None
pdb_file_io = io.StringIO(pdb_str)
try:
load_pdb(pdb_file_io)
except Exception as e:
return f"❌ خطأ أثناء تحليل ملف PDB:\n{str(e)}", None, None
html_3d = get_protein_3d_html(pdb_str)
prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
if not smiles_list:
return "❌ لم يتم توليد أي SMILES صالحة", html_3d, None
smiles_txt = "\n".join(smiles_list)
smiles_file_path = "/tmp/generated_smiles.txt"
with open(smiles_file_path, "w") as f:
f.write(smiles_txt)
return "✅ تم توليد المركبات بنجاح", html_3d, smiles_file_path
except Exception as e:
return f"❌ حدث خطأ:\n{str(e)}", None, None
css = """
body {background-color: #f0f9ff;}
h1 {color: #004d66; text-align: center;}
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("<h1>🔬 Drug-like Molecule Generation from PDB using ChemGPT</h1>")
gr.Markdown("🧪"Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates."
.")
with gr.Row():
pdb_input = gr.File(label="📁 ارفع ملف PDB")
run_btn = gr.Button("🚀 توليد المركبات")
status = gr.Textbox(label="📢 الحالة")
view3d = gr.HTML(label="🧬 عرض ثلاثي الأبعاد")
file_output = gr.File(label="📄 تحميل ملف SMILES")
run_btn.click(
fn=generate_from_pdb,
inputs=pdb_input,
outputs=[status, view3d, file_output]
)
demo.launch()
|