Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import concurrent.futures
|
| 2 |
import glob
|
| 3 |
import smtplib
|
| 4 |
-
from datetime import datetime
|
| 5 |
import itertools
|
| 6 |
import textwrap
|
| 7 |
from email.mime.multipart import MIMEMultipart
|
|
@@ -52,9 +52,9 @@ from deepscreen.predict import predict
|
|
| 52 |
sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
|
| 53 |
import sascorer
|
| 54 |
|
| 55 |
-
UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
|
| 56 |
DATASET_MAX_LEN = 10_000
|
| 57 |
SERVER_DATA_DIR = os.getenv('DATA') # '/data'
|
|
|
|
| 58 |
|
| 59 |
CSS = """
|
| 60 |
.help-tip {
|
|
@@ -244,20 +244,32 @@ for job in jobs:
|
|
| 244 |
scheduler = BackgroundScheduler()
|
| 245 |
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
def check_expiry():
|
| 248 |
Job = Query()
|
| 249 |
jobs = db.all()
|
| 250 |
|
| 251 |
for job in jobs:
|
| 252 |
# Check if the job has expired
|
| 253 |
-
if job['
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
|
|
|
|
|
|
| 261 |
elif job['status'] == 'RUNNING' and time() - job['start_time'] > 4 * 60 * 60: # 4 hours
|
| 262 |
# Mark the job as failed
|
| 263 |
db.update({'status': 'FAILED',
|
|
@@ -699,7 +711,7 @@ def submit_predict(predict_filepath, task, preset, target_family, state):
|
|
| 699 |
job_query = (Job.id == job_id)
|
| 700 |
|
| 701 |
end_time = time()
|
| 702 |
-
expiry_time = end_time +
|
| 703 |
|
| 704 |
db.update({'end_time': end_time,
|
| 705 |
'expiry_time': expiry_time,
|
|
@@ -758,7 +770,7 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
| 758 |
raw_df: df,
|
| 759 |
report_df: df.copy(),
|
| 760 |
analyze_btn: gr.Button(interactive=True),
|
| 761 |
-
report_task:
|
| 762 |
else:
|
| 763 |
return {analyze_btn: gr.Button(interactive=False)}
|
| 764 |
|
|
@@ -1567,7 +1579,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1567 |
with gr.Column():
|
| 1568 |
file_for_report = gr.File(interactive=True, type='filepath')
|
| 1569 |
report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
|
| 1570 |
-
label='Specify the Task
|
| 1571 |
raw_df = gr.State(value=pd.DataFrame())
|
| 1572 |
report_df = gr.State(value=pd.DataFrame())
|
| 1573 |
scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
|
|
@@ -1660,6 +1672,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1660 |
|
| 1661 |
|
| 1662 |
def uniprot_query(input_type, uid, gene, organism='Human'):
|
|
|
|
| 1663 |
fasta_rec = ''
|
| 1664 |
|
| 1665 |
match input_type:
|
|
@@ -1670,11 +1683,11 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1670 |
query = f'search?query=organism_name:{organism.strip()}+AND+gene:{gene.strip()}&format=fasta'
|
| 1671 |
|
| 1672 |
try:
|
| 1673 |
-
fasta = session.get(
|
| 1674 |
fasta.raise_for_status()
|
| 1675 |
if fasta.text:
|
| 1676 |
fasta_rec = next(SeqIO.parse(io.StringIO(fasta.text), format='fasta'))
|
| 1677 |
-
fasta_rec = f"{fasta_rec.description}\n{fasta_rec.seq}"
|
| 1678 |
|
| 1679 |
except Exception as e:
|
| 1680 |
raise gr.Warning(f"Failed to query FASTA from UniProt database due to {str(e)}")
|
|
@@ -1698,18 +1711,21 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1698 |
|
| 1699 |
|
| 1700 |
def target_family_detect(fasta, progress=gr.Progress(track_tqdm=True)):
|
| 1701 |
-
|
| 1702 |
-
|
| 1703 |
-
|
| 1704 |
-
|
| 1705 |
-
|
| 1706 |
-
|
| 1707 |
-
|
| 1708 |
-
|
| 1709 |
-
|
| 1710 |
-
|
| 1711 |
-
|
| 1712 |
-
|
|
|
|
|
|
|
|
|
|
| 1713 |
|
| 1714 |
|
| 1715 |
target_family_detect_btn.click(fn=target_family_detect, inputs=target_fasta, outputs=drug_screen_target_family)
|
|
@@ -2243,11 +2259,11 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 2243 |
return None
|
| 2244 |
|
| 2245 |
|
| 2246 |
-
def create_html_report_file(df, file_report, progress=gr.Progress(track_tqdm=True)):
|
| 2247 |
try:
|
| 2248 |
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
| 2249 |
filename = f"/data/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
|
| 2250 |
-
create_html_report(df, filename)
|
| 2251 |
return gr.File(filename, visible=True)
|
| 2252 |
except Exception as e:
|
| 2253 |
gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
|
|
@@ -2261,7 +2277,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 2261 |
outputs=csv_download_file, show_progress='full')
|
| 2262 |
html_generate.click(
|
| 2263 |
lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[html_generate, html_download_file],
|
| 2264 |
-
).then(fn=create_html_report_file, inputs=[report_df, file_for_report],
|
| 2265 |
outputs=html_download_file, show_progress='full')
|
| 2266 |
|
| 2267 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import concurrent.futures
|
| 2 |
import glob
|
| 3 |
import smtplib
|
| 4 |
+
from datetime import datetime, timedelta
|
| 5 |
import itertools
|
| 6 |
import textwrap
|
| 7 |
from email.mime.multipart import MIMEMultipart
|
|
|
|
| 52 |
sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
|
| 53 |
import sascorer
|
| 54 |
|
|
|
|
| 55 |
DATASET_MAX_LEN = 10_000
|
| 56 |
SERVER_DATA_DIR = os.getenv('DATA') # '/data'
|
| 57 |
+
DB_EXPIRY = timedelta(hours=48).total_seconds()
|
| 58 |
|
| 59 |
CSS = """
|
| 60 |
.help-tip {
|
|
|
|
| 244 |
scheduler = BackgroundScheduler()
|
| 245 |
|
| 246 |
|
| 247 |
+
def remove_job_record(job_id):
|
| 248 |
+
# Delete the job from the database
|
| 249 |
+
db.remove(Job.id == job_id)
|
| 250 |
+
# Delete the corresponding files
|
| 251 |
+
files = glob.glob(f"/data/{job_id}*")
|
| 252 |
+
for file_path in files:
|
| 253 |
+
if os.path.exists(file_path):
|
| 254 |
+
os.remove(file_path)
|
| 255 |
+
|
| 256 |
+
|
| 257 |
def check_expiry():
|
| 258 |
Job = Query()
|
| 259 |
jobs = db.all()
|
| 260 |
|
| 261 |
for job in jobs:
|
| 262 |
# Check if the job has expired
|
| 263 |
+
if job['status'] != 'RUNNING':
|
| 264 |
+
expiry_time = job['expiry_time'] if job['expiry_time'] is not None else job['start_time'] + DB_EXPIRY
|
| 265 |
+
if expiry_time < time():
|
| 266 |
+
# Delete the job from the database
|
| 267 |
+
db.remove(Job.id == job['id'])
|
| 268 |
+
# Delete the corresponding file
|
| 269 |
+
files = glob.glob(f"/data/{job['id']}*")
|
| 270 |
+
for file_path in files:
|
| 271 |
+
if os.path.exists(file_path):
|
| 272 |
+
os.remove(file_path)
|
| 273 |
elif job['status'] == 'RUNNING' and time() - job['start_time'] > 4 * 60 * 60: # 4 hours
|
| 274 |
# Mark the job as failed
|
| 275 |
db.update({'status': 'FAILED',
|
|
|
|
| 711 |
job_query = (Job.id == job_id)
|
| 712 |
|
| 713 |
end_time = time()
|
| 714 |
+
expiry_time = end_time + DB_EXPIRY
|
| 715 |
|
| 716 |
db.update({'end_time': end_time,
|
| 717 |
'expiry_time': expiry_time,
|
|
|
|
| 770 |
raw_df: df,
|
| 771 |
report_df: df.copy(),
|
| 772 |
analyze_btn: gr.Button(interactive=True),
|
| 773 |
+
report_task: task} # pie_chart
|
| 774 |
else:
|
| 775 |
return {analyze_btn: gr.Button(interactive=False)}
|
| 776 |
|
|
|
|
| 1579 |
with gr.Column():
|
| 1580 |
file_for_report = gr.File(interactive=True, type='filepath')
|
| 1581 |
report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
|
| 1582 |
+
label='Specify the Task Labels in the Upload Dataset')
|
| 1583 |
raw_df = gr.State(value=pd.DataFrame())
|
| 1584 |
report_df = gr.State(value=pd.DataFrame())
|
| 1585 |
scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
|
|
|
|
| 1672 |
|
| 1673 |
|
| 1674 |
def uniprot_query(input_type, uid, gene, organism='Human'):
|
| 1675 |
+
uniprot_endpoint = 'https://rest.uniprot.org/uniprotkb/{query}'
|
| 1676 |
fasta_rec = ''
|
| 1677 |
|
| 1678 |
match input_type:
|
|
|
|
| 1683 |
query = f'search?query=organism_name:{organism.strip()}+AND+gene:{gene.strip()}&format=fasta'
|
| 1684 |
|
| 1685 |
try:
|
| 1686 |
+
fasta = session.get(uniprot_endpoint.format(query=query))
|
| 1687 |
fasta.raise_for_status()
|
| 1688 |
if fasta.text:
|
| 1689 |
fasta_rec = next(SeqIO.parse(io.StringIO(fasta.text), format='fasta'))
|
| 1690 |
+
fasta_rec = f">{fasta_rec.description}\n{fasta_rec.seq}"
|
| 1691 |
|
| 1692 |
except Exception as e:
|
| 1693 |
raise gr.Warning(f"Failed to query FASTA from UniProt database due to {str(e)}")
|
|
|
|
| 1711 |
|
| 1712 |
|
| 1713 |
def target_family_detect(fasta, progress=gr.Progress(track_tqdm=True)):
|
| 1714 |
+
try:
|
| 1715 |
+
aligner = PairwiseAligner(scoring='blastp', mode='local')
|
| 1716 |
+
alignment_df = pd.read_csv('data/target_libraries/ChEMBL33_all_spe_single_prot_info.csv')
|
| 1717 |
+
|
| 1718 |
+
def align_score(query):
|
| 1719 |
+
return aligner.align(process_target_fasta(fasta), query).score
|
| 1720 |
+
|
| 1721 |
+
alignment_df['score'] = alignment_df['X2'].swifter.progress_bar(
|
| 1722 |
+
desc="Detecting protein family of the target...").apply(align_score)
|
| 1723 |
+
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
| 1724 |
+
return gr.Dropdown(value=row['protein_family'].capitalize(),
|
| 1725 |
+
info=f"Reason: Best BLASTP score ({row['score']}) "
|
| 1726 |
+
f"with {row['ID2']} from family {row['protein_family']}")
|
| 1727 |
+
except Exception as e:
|
| 1728 |
+
gr.Warning("Failed to detect the protein family due to error: " + str(e))
|
| 1729 |
|
| 1730 |
|
| 1731 |
target_family_detect_btn.click(fn=target_family_detect, inputs=target_fasta, outputs=drug_screen_target_family)
|
|
|
|
| 2259 |
return None
|
| 2260 |
|
| 2261 |
|
| 2262 |
+
def create_html_report_file(df, file_report, task, progress=gr.Progress(track_tqdm=True)):
|
| 2263 |
try:
|
| 2264 |
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
| 2265 |
filename = f"/data/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
|
| 2266 |
+
create_html_report(df, filename, task)
|
| 2267 |
return gr.File(filename, visible=True)
|
| 2268 |
except Exception as e:
|
| 2269 |
gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
|
|
|
|
| 2277 |
outputs=csv_download_file, show_progress='full')
|
| 2278 |
html_generate.click(
|
| 2279 |
lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[html_generate, html_download_file],
|
| 2280 |
+
).then(fn=create_html_report_file, inputs=[report_df, file_for_report, report_task],
|
| 2281 |
outputs=html_download_file, show_progress='full')
|
| 2282 |
|
| 2283 |
if __name__ == "__main__":
|