Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -24,12 +24,10 @@ import gradio as gr
|
|
24 |
import hydra
|
25 |
import pandas as pd
|
26 |
import requests
|
27 |
-
from markdown import markdown
|
28 |
-
from rdkit.Chem.PandasTools import _MolPlusFingerprint
|
29 |
-
from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
|
30 |
from requests.adapters import HTTPAdapter, Retry
|
|
|
31 |
from rdkit import Chem
|
32 |
-
from rdkit.Chem import RDConfig, Descriptors,
|
33 |
from rdkit.Chem.Scaffolds import MurckoScaffold
|
34 |
import seaborn as sns
|
35 |
|
@@ -283,46 +281,6 @@ scheduler.add_job(check_expiry, 'interval', hours=1)
|
|
283 |
scheduler.start()
|
284 |
|
285 |
|
286 |
-
def sa_score(mol):
|
287 |
-
return sascorer.calculateScore(mol)
|
288 |
-
|
289 |
-
|
290 |
-
def mw(mol):
|
291 |
-
return Chem.Descriptors.MolWt(mol)
|
292 |
-
|
293 |
-
|
294 |
-
def mr(mol):
|
295 |
-
return Crippen.MolMR(mol)
|
296 |
-
|
297 |
-
|
298 |
-
def hbd(mol):
|
299 |
-
return Lipinski.NumHDonors(mol)
|
300 |
-
|
301 |
-
|
302 |
-
def hba(mol):
|
303 |
-
return Lipinski.NumHAcceptors(mol)
|
304 |
-
|
305 |
-
|
306 |
-
def logp(mol):
|
307 |
-
return Crippen.MolLogP(mol)
|
308 |
-
|
309 |
-
|
310 |
-
def atom(mol):
|
311 |
-
return CalcNumAtoms(mol)
|
312 |
-
|
313 |
-
|
314 |
-
def heavy_atom(mol):
|
315 |
-
return CalcNumHeavyAtoms(mol)
|
316 |
-
|
317 |
-
|
318 |
-
def rotatable_bond(mol):
|
319 |
-
return CalcNumRotatableBonds(mol)
|
320 |
-
|
321 |
-
|
322 |
-
def tpsa(mol):
|
323 |
-
return CalcTPSA(mol)
|
324 |
-
|
325 |
-
|
326 |
def lipinski(mol):
|
327 |
"""
|
328 |
Lipinski's rules:
|
@@ -331,16 +289,12 @@ def lipinski(mol):
|
|
331 |
Molecular weight <= 500 daltons
|
332 |
logP <= 5
|
333 |
"""
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
elif logp(mol) > 5:
|
341 |
-
return False
|
342 |
-
else:
|
343 |
-
return True
|
344 |
|
345 |
|
346 |
def reos(mol):
|
@@ -354,20 +308,15 @@ def reos(mol):
|
|
354 |
Rotatable bond count between 0 and 8
|
355 |
Heavy atom count between 15 and 50
|
356 |
"""
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
return False
|
367 |
-
elif not 15 < heavy_atom(mol) < 50:
|
368 |
-
return False
|
369 |
-
else:
|
370 |
-
return True
|
371 |
|
372 |
|
373 |
def ghose(mol):
|
@@ -378,16 +327,12 @@ def ghose(mol):
|
|
378 |
Atom count between 20 and 70
|
379 |
Molar refractivity between 40 and 130
|
380 |
"""
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
elif not 40 < mr(mol) < 130:
|
388 |
-
return False
|
389 |
-
else:
|
390 |
-
return True
|
391 |
|
392 |
|
393 |
def veber(mol):
|
@@ -397,12 +342,10 @@ def veber(mol):
|
|
397 |
Rotatable bonds <= 10
|
398 |
Topological polar surface area <= 140
|
399 |
"""
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
else:
|
405 |
-
return True
|
406 |
|
407 |
|
408 |
def rule_of_three(mol):
|
@@ -414,18 +357,13 @@ def rule_of_three(mol):
|
|
414 |
H-bond acceptor count <= 3
|
415 |
Rotatable bond count <= 3
|
416 |
"""
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
return False
|
425 |
-
elif not rotatable_bond(mol) <= 3:
|
426 |
-
return False
|
427 |
-
else:
|
428 |
-
return True
|
429 |
|
430 |
|
431 |
@cache
|
@@ -470,15 +408,16 @@ def bms(mol):
|
|
470 |
|
471 |
|
472 |
SCORE_MAP = {
|
473 |
-
'SAscore':
|
474 |
-
'LogP':
|
475 |
-
'Molecular Weight':
|
476 |
-
'Number of
|
477 |
-
'
|
478 |
-
'
|
479 |
-
'H-Bond
|
480 |
-
'
|
481 |
-
'
|
|
|
482 |
}
|
483 |
|
484 |
FILTER_MAP = {
|
@@ -618,7 +557,7 @@ def ts_to_str(timestamp, timezone):
|
|
618 |
localized_dt = dt.astimezone(target_timezone)
|
619 |
|
620 |
# Format the datetime object to the specified string format
|
621 |
-
return localized_dt.strftime('%Y-%m-%d %H:%M:%S (%Z)')
|
622 |
|
623 |
|
624 |
def lookup_job(job_id):
|
@@ -783,12 +722,12 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
783 |
desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
|
784 |
df['Scaffold'] = df['Scaffold SMILES'].swifter.progress_bar(
|
785 |
desc='Generating scaffold graphs...').apply(
|
786 |
-
lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
787 |
# Add a new column with RDKit molecule objects
|
788 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
789 |
df['Compound'] = df['X1'].swifter.progress_bar(
|
790 |
desc='Generating molecular graphs...').apply(
|
791 |
-
lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
792 |
|
793 |
# DF_FOR_REPORT = df.copy()
|
794 |
|
|
|
24 |
import hydra
|
25 |
import pandas as pd
|
26 |
import requests
|
|
|
|
|
|
|
27 |
from requests.adapters import HTTPAdapter, Retry
|
28 |
+
from markdown import markdown
|
29 |
from rdkit import Chem
|
30 |
+
from rdkit.Chem import Draw, RDConfig, PandasTools, Descriptors, rdMolDescriptors, rdmolops, Lipinski, Crippen
|
31 |
from rdkit.Chem.Scaffolds import MurckoScaffold
|
32 |
import seaborn as sns
|
33 |
|
|
|
281 |
scheduler.start()
|
282 |
|
283 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
def lipinski(mol):
|
285 |
"""
|
286 |
Lipinski's rules:
|
|
|
289 |
Molecular weight <= 500 daltons
|
290 |
logP <= 5
|
291 |
"""
|
292 |
+
return (
|
293 |
+
Lipinski.NumHDonors(mol) <= 5 and
|
294 |
+
Lipinski.NumHAcceptors(mol) <= 10 and
|
295 |
+
Descriptors.MolWt(mol) <= 500 and
|
296 |
+
Crippen.MolLogP(mol) <= 5
|
297 |
+
)
|
|
|
|
|
|
|
|
|
298 |
|
299 |
|
300 |
def reos(mol):
|
|
|
308 |
Rotatable bond count between 0 and 8
|
309 |
Heavy atom count between 15 and 50
|
310 |
"""
|
311 |
+
return (
|
312 |
+
200 <= Descriptors.MolWt(mol) <= 500 and
|
313 |
+
-5.0 <= Crippen.MolLogP(mol) <= 5.0 and
|
314 |
+
0 <= Lipinski.NumHDonors(mol) <= 5 and
|
315 |
+
0 <= Lipinski.NumHAcceptors(mol) <= 10 and
|
316 |
+
-2 <= rdmolops.GetFormalCharge(mol) <= 2 and
|
317 |
+
0 <= rdMolDescriptors.CalcNumRotatableBonds(mol) <= 8 and
|
318 |
+
15 <= rdMolDescriptors.CalcNumHeavyAtoms(mol) <= 50
|
319 |
+
)
|
|
|
|
|
|
|
|
|
|
|
320 |
|
321 |
|
322 |
def ghose(mol):
|
|
|
327 |
Atom count between 20 and 70
|
328 |
Molar refractivity between 40 and 130
|
329 |
"""
|
330 |
+
return (
|
331 |
+
160 <= Descriptors.MolWt(mol) <= 480 and
|
332 |
+
-0.4 <= Crippen.MolLogP(mol) <= 5.6 and
|
333 |
+
20 <= rdMolDescriptors.CalcNumAtoms(mol) <= 70 and
|
334 |
+
40 <= Crippen.MolMR(mol) <= 130
|
335 |
+
)
|
|
|
|
|
|
|
|
|
336 |
|
337 |
|
338 |
def veber(mol):
|
|
|
342 |
Rotatable bonds <= 10
|
343 |
Topological polar surface area <= 140
|
344 |
"""
|
345 |
+
return (
|
346 |
+
rdMolDescriptors.CalcNumRotatableBonds(mol) <= 10 and
|
347 |
+
rdMolDescriptors.CalcTPSA(mol) <= 140
|
348 |
+
)
|
|
|
|
|
349 |
|
350 |
|
351 |
def rule_of_three(mol):
|
|
|
357 |
H-bond acceptor count <= 3
|
358 |
Rotatable bond count <= 3
|
359 |
"""
|
360 |
+
return (
|
361 |
+
Descriptors.MolWt(mol) <= 300 and
|
362 |
+
Crippen.MolLogP(mol) <= 3 and
|
363 |
+
Lipinski.NumHDonors(mol) <= 3 and
|
364 |
+
Lipinski.NumHAcceptors(mol) <= 3 and
|
365 |
+
rdMolDescriptors.CalcNumRotatableBonds(mol) <= 3
|
366 |
+
)
|
|
|
|
|
|
|
|
|
|
|
367 |
|
368 |
|
369 |
@cache
|
|
|
408 |
|
409 |
|
410 |
SCORE_MAP = {
|
411 |
+
'SAscore': sascorer.calculateScore,
|
412 |
+
'LogP': Crippen.MolLogP,
|
413 |
+
'Molecular Weight': Descriptors.MolWt,
|
414 |
+
'Number of Atoms': rdMolDescriptors.CalcNumAtoms,
|
415 |
+
'Number of Heavy Atoms': rdMolDescriptors.CalcNumHeavyAtoms,
|
416 |
+
'Molar Refractivity': Crippen.MolMR,
|
417 |
+
'H-Bond Donor Count': Lipinski.NumHDonors,
|
418 |
+
'H-Bond Acceptor Count': Lipinski.NumHAcceptors,
|
419 |
+
'Rotatable Bond Count': rdMolDescriptors.CalcNumRotatableBonds,
|
420 |
+
'Topological Polar Surface Area': rdMolDescriptors.CalcTPSA,
|
421 |
}
|
422 |
|
423 |
FILTER_MAP = {
|
|
|
557 |
localized_dt = dt.astimezone(target_timezone)
|
558 |
|
559 |
# Format the datetime object to the specified string format
|
560 |
+
return localized_dt.strftime('%Y-%m-%d %H:%M:%S (%Z%z)')
|
561 |
|
562 |
|
563 |
def lookup_job(job_id):
|
|
|
722 |
desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
|
723 |
df['Scaffold'] = df['Scaffold SMILES'].swifter.progress_bar(
|
724 |
desc='Generating scaffold graphs...').apply(
|
725 |
+
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
726 |
# Add a new column with RDKit molecule objects
|
727 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
728 |
df['Compound'] = df['X1'].swifter.progress_bar(
|
729 |
desc='Generating molecular graphs...').apply(
|
730 |
+
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
731 |
|
732 |
# DF_FOR_REPORT = df.copy()
|
733 |
|