mohamed20003 commited on
Commit
0fcfaed
·
verified ·
1 Parent(s): 161f144

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -54
app.py CHANGED
@@ -1,12 +1,11 @@
1
  from Bio import PDB
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from rdkit import Chem
4
- import py3Dmol
5
- import re
6
- import io
7
  import selfies as sf
8
  import torch
9
  import time
 
 
10
  import gradio as gr
11
 
12
  # إعداد العشوائية
@@ -14,7 +13,7 @@ torch.manual_seed(int(time.time()))
14
  if torch.cuda.is_available():
15
  torch.cuda.manual_seed_all(int(time.time()))
16
 
17
- # تحميل النموذج
18
  model_name = "ncfrey/ChemGPT-1.2B"
19
  tokenizer = AutoTokenizer.from_pretrained(model_name)
20
  model = AutoModelForCausalLM.from_pretrained(model_name)
@@ -24,13 +23,6 @@ def load_pdb(file_obj):
24
  structure = parser.get_structure('protein', file_obj)
25
  return structure
26
 
27
- def get_protein_3d_html(pdb_str):
28
- view = py3Dmol.view(width=600, height=400)
29
- view.addModel(pdb_str, "pdb")
30
- view.setStyle({"cartoon": {"color": "spectrum"}})
31
- view.zoomTo()
32
- return view._make_html()
33
-
34
  def clean_and_decode_selfies(raw_output):
35
  tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
36
  valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
@@ -63,58 +55,33 @@ def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
63
  tries += 1
64
  return list(valid_smiles)
65
 
66
- def generate_from_pdb(pdb_file):
67
  try:
68
- with open(pdb_file.name, 'r', encoding='utf-8', errors='ignore') as f:
69
- pdb_str = f.read()
70
-
71
- if len(pdb_str.strip()) == 0:
72
- return "❌ الملف فارغ أو غير صالح", None, None
73
-
74
- pdb_file_io = io.StringIO(pdb_str)
75
- try:
76
- load_pdb(pdb_file_io)
77
- except Exception as e:
78
- return f"❌ خطأ أثناء تحليل ملف PDB:\n{str(e)}", None, None
79
-
80
- html_3d = get_protein_3d_html(pdb_str)
81
 
82
  prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
83
  smiles_list = generate_multiple_valid_smiles(prompt, n=10)
84
 
85
  if not smiles_list:
86
- return "❌ لم يتم توليد أي SMILES صالحة", html_3d, None
87
-
88
- smiles_txt = "\n".join(smiles_list)
89
- smiles_file_path = "/tmp/generated_smiles.txt"
90
- with open(smiles_file_path, "w") as f:
91
- f.write(smiles_txt)
92
 
93
- return "✅ تم توليد المركبات بنجاح", html_3d, smiles_file_path
 
94
 
95
  except Exception as e:
96
- return f"❌ حدث خطأ:\n{str(e)}", None, None
97
-
98
- css = """
99
- body {background-color: #f0f9ff;}
100
- h1 {color: #004d66; text-align: center;}
101
- """
102
-
103
- with gr.Blocks(css=css) as demo:
104
- gr.Markdown("<h1>🔬 Drug-like Molecule Generation from PDB using ChemGPT</h1>")
105
- gr.Markdown("🧪 Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates.")
106
 
 
 
 
107
  with gr.Row():
108
- pdb_input = gr.File(label="📁 Upload PDB File")
109
- run_btn = gr.Button("🚀 Generate Molecules")
110
- status = gr.Textbox(label="📢 Status")
111
- view3d = gr.HTML(label="🧬 3D Structure View")
112
- file_output = gr.File(label="📄 Download SMILES File")
113
- run_btn.click(
114
- fn=generate_from_pdb,
115
- inputs=pdb_input,
116
- outputs=[status, view3d, file_output]
117
- )
118
 
119
- if __name__ == "__main__":
120
- demo.launch()
 
1
  from Bio import PDB
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from rdkit import Chem
 
 
 
4
  import selfies as sf
5
  import torch
6
  import time
7
+ import re
8
+ import io
9
  import gradio as gr
10
 
11
  # إعداد العشوائية
 
13
  if torch.cuda.is_available():
14
  torch.cuda.manual_seed_all(int(time.time()))
15
 
16
+ # تحميل نموذج ChemGPT
17
  model_name = "ncfrey/ChemGPT-1.2B"
18
  tokenizer = AutoTokenizer.from_pretrained(model_name)
19
  model = AutoModelForCausalLM.from_pretrained(model_name)
 
23
  structure = parser.get_structure('protein', file_obj)
24
  return structure
25
 
 
 
 
 
 
 
 
26
  def clean_and_decode_selfies(raw_output):
27
  tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
28
  valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
 
55
  tries += 1
56
  return list(valid_smiles)
57
 
58
+ def generate_drugs_from_pdb(pdb_file):
59
  try:
60
+ pdb_bytes = pdb_file.read()
61
+ pdb_str = pdb_bytes.decode('utf-8', errors='ignore')
62
+ pdb_io = io.StringIO(pdb_str)
63
+ load_pdb(pdb_io)
 
 
 
 
 
 
 
 
 
64
 
65
  prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
66
  smiles_list = generate_multiple_valid_smiles(prompt, n=10)
67
 
68
  if not smiles_list:
69
+ return "❌ لم يتم توليد أي SMILES صالحة", ""
 
 
 
 
 
70
 
71
+ smiles_text = "\n".join(smiles_list)
72
+ return "✅ تم توليد المركبات بنجاح", smiles_text
73
 
74
  except Exception as e:
75
+ return f"❌ خطأ: {str(e)}", ""
 
 
 
 
 
 
 
 
 
76
 
77
+ # واجهة Gradio
78
+ with gr.Blocks() as demo:
79
+ gr.Markdown("# 🧬 توليد مركبات دوائية من ملف PDB باستخدام ChemGPT")
80
  with gr.Row():
81
+ pdb_input = gr.File(label="📁 ارفع ملف PDB")
82
+ run_btn = gr.Button("🚀 توليد SMILES")
83
+ status = gr.Textbox(label="📢 الحالة")
84
+ smiles_output = gr.Textbox(label="📄 المركبات (SMILES)", lines=10)
85
+ run_btn.click(fn=generate_drugs_from_pdb, inputs=pdb_input, outputs=[status, smiles_output])
 
 
 
 
 
86
 
87
+ demo.launch()