libokj commited on
Commit
47d08b9
·
1 Parent(s): 11c31b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -100
app.py CHANGED
@@ -824,6 +824,93 @@ using the job id. You will also receive an email notification once the job is do
824
  raise gr.Error(f'Failed to retrieve job status due to error: {str(e)}')
825
 
826
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
827
  def submit_predict(predict_filepath, task, preset, target_family, opts, job_info):
828
  job_id = job_info['id']
829
  status = job_info['status']
@@ -968,88 +1055,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
968
  df_list = [prediction_df, annotated_df]
969
  prediction_df = pd.concat([df for df in df_list if not df.empty], ignore_index=True)
970
 
971
- # Advanced options for Drug Hit Screening
972
- if "Calculate Max. Sequence Identity between the Input Target and Targets in the Training Set" in opts:
973
- x2 = prediction_df['X2'].iloc[0]
974
-
975
- prediction_df[[
976
- 'Max. Sequence Identity to Training Targets',
977
- 'Max. Id. Training Target'
978
- ]] = pd.Series(max_sequence_identity(x2, df_training))
979
-
980
- if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
981
- x2 = prediction_df['X2'].iloc[0]
982
- pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
983
- pos_compounds_df['FP'] = pos_compounds_df['X1'].parallel_apply(smiles_to_ecfp)
984
-
985
- @cache
986
- def max_sim(smiles):
987
- return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
988
-
989
- prediction_df[[
990
- 'Max. Tanimoto Similarity to Known Ligands',
991
- 'Max. Sim. Ligand'
992
- ]] = prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
993
-
994
- max_sim.cache_clear()
995
-
996
- if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
997
- x2 = prediction_df['X2'].iloc[0]
998
- prediction_df['X1^'] = prediction_df['X1'].parallel_apply(rdkit_canonicalize)
999
-
1000
- @cache
1001
- def max_id(compound):
1002
- pos_targets_df = df_training.loc[df_training['X1'] == compound]
1003
- return max_sequence_identity(x2, seen_fastas=pos_targets_df)
1004
-
1005
- prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
1006
- 'Max. Id. Target']] = (
1007
- prediction_df['X1^'].parallel_apply(max_id).apply(pd.Series)
1008
- )
1009
- prediction_df.drop(['X1^'], axis=1, inplace=True)
1010
-
1011
- max_id.cache_clear()
1012
-
1013
- # Advanced options for Target Protein Identification
1014
- if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
1015
- x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
1016
- prediction_df['FP'] = prediction_df['X1'].parallel_apply(smiles_to_ecfp)
1017
-
1018
- prediction_df[[
1019
- 'Max. Tanimoto Similarity to Training Compounds',
1020
- 'Max. Sim. Training Compound'
1021
- ]] = pd.Series(max_tanimoto_similarity(x1, df_training))
1022
-
1023
- if "Calculate Max. Sequence Identity between the Identified Target and Known Targets of the Input Compound" in opts:
1024
- x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
1025
- pos_targets_df = df_training.loc[(df_training['X1'] == x1) & (df_training['Y'] == 1)].copy()
1026
-
1027
- @cache
1028
- def max_id(fasta):
1029
- return max_sequence_identity(fasta, seen_fastas=pos_targets_df)
1030
-
1031
- prediction_df[[
1032
- 'Max. Sequence Identity to Known Targets of Input Compound',
1033
- 'Max. Id. Target'
1034
- ]] = prediction_df['X2'].parallel_apply(max_id).apply(pd.Series)
1035
-
1036
- max_id.cache_clear()
1037
-
1038
- if "Calculate Max. Tanimoto Similarity between the Input Compound and Known Ligands of the Identified Target" in opts:
1039
- x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
1040
-
1041
- @cache
1042
- def max_sim(fasta):
1043
- pos_targets_df = df_training.loc[(df_training['X2'] == fasta) & (df_training['Y'] == 1)].copy()
1044
- pos_targets_df['FP'] = pos_targets_df['X1'].apply(smiles_to_ecfp)
1045
- return max_tanimoto_similarity(x1, seen_smiles_with_fp=pos_targets_df)
1046
-
1047
- prediction_df[[
1048
- 'Max. Tanimoto Similarity to Known Ligands of Identified Target',
1049
- 'Max. Sim. Ligand'
1050
- ]] = prediction_df['X2'].parallel_apply(max_sim).apply(pd.Series)
1051
-
1052
- max_sim.cache_clear()
1053
 
1054
  prediction_df.drop(
1055
  [col for col in ['N', 'FP'] if col in prediction_df.columns], axis=1
@@ -1087,6 +1093,8 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
1087
  def update_df(file, progress=gr.Progress(track_tqdm=True)):
1088
  if file and Path(file).is_file():
1089
  task = None
 
 
1090
  if "_CPI_" in str(file):
1091
  task = 'Compound-Protein Interaction'
1092
  elif "_CPA_" in str(file):
@@ -1113,11 +1121,33 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
1113
  if 'Y^' in df.columns:
1114
  df['Y^'] = 10 ** (-df['Y^'])
1115
 
1116
- return {html_report: create_html_report(df, file=None, task=task),
1117
- raw_df: df,
1118
- report_df: df.copy(),
1119
- analyze_btn: gr.Button(interactive=True),
1120
- report_task: task} # pie_chart
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121
  else:
1122
  return {analyze_btn: gr.Button(interactive=False)}
1123
 
@@ -1253,6 +1283,21 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1253
  if unique_df is not None:
1254
  if 'Target FASTA' in unique_df.columns:
1255
  unique_df['Target FASTA'] = unique_df['Target FASTA'].str.replace('\n', '<br>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1256
  if any(unique_df.columns.isin(bool_cols)):
1257
  unique_df = unique_df.style.applymap(
1258
  lambda val: f"background-color: {bool_col_colors[val]}", subset=bool_cols)
@@ -1268,11 +1313,11 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1268
  uniprot_id_formatter = HTMLTemplateFormatter(
1269
  template='<% if (value == value) { ' # Check if value is not NaN
1270
  'if (/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$/.test(value)) '
1271
- # Check if value is a valid UniProt ID
1272
  '{ %><a href="https://www.uniprot.org/uniprotkb/<%= value %>" target="_blank"><%= value %></a><% '
1273
- # Else treat it as a sequence or other plain-text string, line-warping every 60 characters
1274
- '} else { %><div style="white-space: pre-wrap;"><%= value.match(/.{1,60}/g).join("<br>") %></div><% } %>'
1275
- '<% } else { %><% } %>' # Output empty string if value is NaN
1276
  )
1277
  pubchem_id_formatter = HTMLTemplateFormatter(
1278
  template='<% if (value == value) { ' # Check if value is not NaN
@@ -1280,6 +1325,9 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1280
  'target="_blank"><%= value %></a>'
1281
  '<% } else { %><% } %>' # Output empty string if value is NaN
1282
  )
 
 
 
1283
  bool_formatters = {col: BooleanFormatter() for col in bool_cols}
1284
  float_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('floating').columns}
1285
  other_formatters = {
@@ -1294,6 +1342,8 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1294
  'Max. Id. Target': uniprot_id_formatter,
1295
  'Max. Sim. Training Compound': pubchem_id_formatter,
1296
  'Max. Id. Training Target': uniprot_id_formatter,
 
 
1297
  }
1298
  formatters = {**bool_formatters, **float_formatters, **other_formatters}
1299
 
@@ -1492,7 +1542,7 @@ def create_pie_chart(df, category, value, top_k):
1492
  return p
1493
 
1494
 
1495
- def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_tqdm=True)):
1496
  df_report = df.copy()
1497
  try:
1498
  for filter_name in filter_list:
@@ -1503,6 +1553,10 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
1503
  df_report[score_name] = df_report['Compound'].parallel_apply(
1504
  lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
1505
 
 
 
 
 
1506
  return (create_html_report(df_report, file=None, task=task), df_report,
1507
  gr.File(visible=False), gr.File(visible=False))
1508
 
@@ -1726,10 +1780,12 @@ this serves as an additional indicator of the confidence level of the predicted
1726
  higher identities usually lead to greater confidence in the predictions.<br>
1727
  """)
1728
  drug_screen_opts = gr.CheckboxGroup(
1729
- label="Step 6. Select Additional Options",
 
1730
  choices=DRUG_SCRENN_CPI_OPTS,
1731
- info="Experimental features - may increase the job computation time. "
1732
- "See the Help Tip on the right or the Documentation for detailed explanation."
 
1733
  )
1734
  with gr.Row():
1735
  with gr.Column():
@@ -1845,8 +1901,9 @@ higher similarities usually correspond to greater prediction confidence.<br>
1845
  """)
1846
  target_identify_opts = gr.CheckboxGroup(
1847
  choices=TARGET_IDENTIFY_CPI_OPTS,
1848
- label='Step 6. Select Additional Options',
1849
- info="Experimental features - may increase the job computation time. "
 
1850
  "See the Help Tip on the right or the Documentation for detailed explanation."
1851
  )
1852
  with gr.Row():
@@ -2021,8 +2078,11 @@ higher similarities usually correspond to greater prediction confidence.<br>
2021
  label='Specify the Task Labels in the Uploaded Dataset')
2022
  with gr.Column(scale=2):
2023
  with gr.Row():
2024
- scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Compound Scores')
2025
- filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Compound Filters')
 
 
 
2026
  with gr.Accordion('Report Generate Options', open=True):
2027
  with gr.Row():
2028
  csv_sep = gr.Radio(label='CSV Delimiter',
@@ -2784,7 +2844,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2784
  )
2785
 
2786
  analyze_btn.click(
2787
- fn=submit_report, inputs=[raw_df, scores, filters, report_task], outputs=[
2788
  html_report, report_df, csv_download_file, html_download_file]
2789
  ).success(
2790
  fn=lambda: [gr.Button(interactive=True)] * 2,
@@ -2793,6 +2853,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2793
  )
2794
 
2795
 
 
2796
  def create_csv_report_file(df, file_report, task, sep, progress=gr.Progress(track_tqdm=True)):
2797
  csv_sep_map = {
2798
  'Comma': ',',
 
824
  raise gr.Error(f'Failed to retrieve job status due to error: {str(e)}')
825
 
826
 
827
+ def apply_advanced_opts(prediction_df, opts, df_training):
828
+ # Advanced options for Drug Hit Screening
829
+ if "Calculate Max. Sequence Identity between the Input Target and Targets in the Training Set" in opts:
830
+ x2 = prediction_df['X2'].iloc[0]
831
+
832
+ prediction_df[[
833
+ 'Max. Sequence Identity to Training Targets',
834
+ 'Max. Id. Training Target'
835
+ ]] = pd.Series(max_sequence_identity(x2, df_training))
836
+
837
+ if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
838
+ x2 = prediction_df['X2'].iloc[0]
839
+ pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
840
+ pos_compounds_df['FP'] = pos_compounds_df['X1'].parallel_apply(smiles_to_ecfp)
841
+
842
+ @cache
843
+ def max_sim(smiles):
844
+ return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
845
+
846
+ prediction_df[[
847
+ 'Max. Tanimoto Similarity to Known Ligands',
848
+ 'Max. Sim. Ligand'
849
+ ]] = prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
850
+
851
+ max_sim.cache_clear()
852
+
853
+ if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
854
+ x2 = prediction_df['X2'].iloc[0]
855
+ prediction_df['X1^'] = prediction_df['X1'].parallel_apply(rdkit_canonicalize)
856
+
857
+ @cache
858
+ def max_id(compound):
859
+ pos_targets_df = df_training.loc[df_training['X1'] == compound]
860
+ return max_sequence_identity(x2, seen_fastas=pos_targets_df)
861
+
862
+ prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
863
+ 'Max. Id. Target']] = (
864
+ prediction_df['X1^'].parallel_apply(max_id).apply(pd.Series)
865
+ )
866
+ prediction_df.drop(['X1^'], axis=1, inplace=True)
867
+
868
+ max_id.cache_clear()
869
+
870
+ # Advanced options for Target Protein Identification
871
+ if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
872
+ x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
873
+ prediction_df['FP'] = prediction_df['X1'].parallel_apply(smiles_to_ecfp)
874
+
875
+ prediction_df[[
876
+ 'Max. Tanimoto Similarity to Training Compounds',
877
+ 'Max. Sim. Training Compound'
878
+ ]] = pd.Series(max_tanimoto_similarity(x1, df_training))
879
+
880
+ if "Calculate Max. Sequence Identity between the Identified Target and Known Targets of the Input Compound" in opts:
881
+ x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
882
+ pos_targets_df = df_training.loc[(df_training['X1'] == x1) & (df_training['Y'] == 1)].copy()
883
+
884
+ @cache
885
+ def max_id(fasta):
886
+ return max_sequence_identity(fasta, seen_fastas=pos_targets_df)
887
+
888
+ prediction_df[[
889
+ 'Max. Sequence Identity to Known Targets of Input Compound',
890
+ 'Max. Id. Target'
891
+ ]] = prediction_df['X2'].parallel_apply(max_id).apply(pd.Series)
892
+
893
+ max_id.cache_clear()
894
+
895
+ if "Calculate Max. Tanimoto Similarity between the Input Compound and Known Ligands of the Identified Target" in opts:
896
+ x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
897
+
898
+ @cache
899
+ def max_sim(fasta):
900
+ pos_targets_df = df_training.loc[(df_training['X2'] == fasta) & (df_training['Y'] == 1)].copy()
901
+ pos_targets_df['FP'] = pos_targets_df['X1'].apply(smiles_to_ecfp)
902
+ return max_tanimoto_similarity(x1, seen_smiles_with_fp=pos_targets_df)
903
+
904
+ prediction_df[[
905
+ 'Max. Tanimoto Similarity to Known Ligands of Identified Target',
906
+ 'Max. Sim. Ligand'
907
+ ]] = prediction_df['X2'].parallel_apply(max_sim).apply(pd.Series)
908
+
909
+ max_sim.cache_clear()
910
+
911
+ return prediction_df
912
+
913
+
914
  def submit_predict(predict_filepath, task, preset, target_family, opts, job_info):
915
  job_id = job_info['id']
916
  status = job_info['status']
 
1055
  df_list = [prediction_df, annotated_df]
1056
  prediction_df = pd.concat([df for df in df_list if not df.empty], ignore_index=True)
1057
 
1058
+ prediction_df = apply_advanced_opts(prediction_df, opts, df_training)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1059
 
1060
  prediction_df.drop(
1061
  [col for col in ['N', 'FP'] if col in prediction_df.columns], axis=1
 
1093
  def update_df(file, progress=gr.Progress(track_tqdm=True)):
1094
  if file and Path(file).is_file():
1095
  task = None
1096
+ job = None
1097
+
1098
  if "_CPI_" in str(file):
1099
  task = 'Compound-Protein Interaction'
1100
  elif "_CPA_" in str(file):
 
1121
  if 'Y^' in df.columns:
1122
  df['Y^'] = 10 ** (-df['Y^'])
1123
 
1124
+ n_compound = df['X1'].nunique()
1125
+ n_protein = df['X2'].nunique()
1126
+
1127
+ if n_compound == 1 and n_protein >= 2:
1128
+ job = 'Target Protein Identification'
1129
+ if task == 'Compound-Protein Interaction':
1130
+ opts = TARGET_IDENTIFY_CPI_OPTS
1131
+ elif task == 'Compound-Protein Binding Affinity':
1132
+ opts = TARGET_IDENTIFY_CPA_OPTS
1133
+ if n_compound >= 2 and n_protein == 1:
1134
+ job = 'Drug Hit Screening'
1135
+ if task == 'Compound-Protein Interaction':
1136
+ opts = DRUG_SCRENN_CPI_OPTS
1137
+ elif task == 'Compound-Protein Binding Affinity':
1138
+ opts = DRUG_SCRENN_CPA_OPTS
1139
+
1140
+ return {
1141
+ html_report: create_html_report(df, file=None, task=task),
1142
+ raw_df: df,
1143
+ report_df: df.copy(),
1144
+ analyze_btn: gr.Button(interactive=True),
1145
+ report_task: task,
1146
+ job_opts: gr.CheckboxGroup(
1147
+ label=f'{job} Advanced Options',
1148
+ choices=opts,
1149
+ ) if job else gr.CheckboxGroup(visible=False),
1150
+ }
1151
  else:
1152
  return {analyze_btn: gr.Button(interactive=False)}
1153
 
 
1283
  if unique_df is not None:
1284
  if 'Target FASTA' in unique_df.columns:
1285
  unique_df['Target FASTA'] = unique_df['Target FASTA'].str.replace('\n', '<br>')
1286
+
1287
+ if 'Max. Sequence Identity to Training Targets' in unique_df.columns:
1288
+ # Add alert emoji for sequence identity below 0.85
1289
+ if unique_df['Max. Sequence Identity to Training Targets'].iloc[0] < 0.85:
1290
+ unique_df['Max. Sequence Identity to Training Targets'] = (
1291
+ f'{unique_df["Max. Sequence Identity to Training Targets"]:.3f} ⚠️'
1292
+ )
1293
+
1294
+ if 'Max. Tanimoto Similarity to Training Compounds' in unique_df.columns:
1295
+ # Add alert emoji for sequence identity below 0.85
1296
+ if unique_df['Max. Tanimoto Similarity to Training Compounds'].iloc[0] < 0.85:
1297
+ unique_df['Max. Tanimoto Similarity to Training Compounds'] = (
1298
+ f'{unique_df["Max. Tanimoto Similarity to Training Compounds"]:.3f} ⚠️'
1299
+ )
1300
+
1301
  if any(unique_df.columns.isin(bool_cols)):
1302
  unique_df = unique_df.style.applymap(
1303
  lambda val: f"background-color: {bool_col_colors[val]}", subset=bool_cols)
 
1313
  uniprot_id_formatter = HTMLTemplateFormatter(
1314
  template='<% if (value == value) { ' # Check if value is not NaN
1315
  'if (/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$/.test(value)) '
1316
+ # Check if value is a valid UniProt ID
1317
  '{ %><a href="https://www.uniprot.org/uniprotkb/<%= value %>" target="_blank"><%= value %></a><% '
1318
+ # Else treat it as a sequence or other plain-text string, line-warping every 60 characters
1319
+ '} else { %><div style="white-space: pre-wrap;"><%= value.match(/.{1,60}/g).join("<br>") '
1320
+ '%></div><% } %><% } else { %><% } %>' # Output empty string if value is NaN
1321
  )
1322
  pubchem_id_formatter = HTMLTemplateFormatter(
1323
  template='<% if (value == value) { ' # Check if value is not NaN
 
1325
  'target="_blank"><%= value %></a>'
1326
  '<% } else { %><% } %>' # Output empty string if value is NaN
1327
  )
1328
+ alert_emoji_formatter = HTMLTemplateFormatter(
1329
+ template='<% if (value < 0.85) { %><%= value %> ⚠️<% } else { %><%= value %><% } %>'
1330
+ )
1331
  bool_formatters = {col: BooleanFormatter() for col in bool_cols}
1332
  float_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('floating').columns}
1333
  other_formatters = {
 
1342
  'Max. Id. Target': uniprot_id_formatter,
1343
  'Max. Sim. Training Compound': pubchem_id_formatter,
1344
  'Max. Id. Training Target': uniprot_id_formatter,
1345
+ 'Max. Sequence Identity to Training Targets': alert_emoji_formatter,
1346
+ 'Max. Sequence Identity to Known Targets of Hit Compound': alert_emoji_formatter,
1347
  }
1348
  formatters = {**bool_formatters, **float_formatters, **other_formatters}
1349
 
 
1542
  return p
1543
 
1544
 
1545
+ def submit_report(df, score_list, filter_list, opt_list, task, progress=gr.Progress(track_tqdm=True)):
1546
  df_report = df.copy()
1547
  try:
1548
  for filter_name in filter_list:
 
1553
  df_report[score_name] = df_report['Compound'].parallel_apply(
1554
  lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
1555
 
1556
+ if opt_list:
1557
+ df_training = pd.read_csv(f'data/complete_{TASK_MAP[task].lower()}_dataset.csv')
1558
+ df_report = apply_advanced_opts(df_report, opt_list, df_training)
1559
+
1560
  return (create_html_report(df_report, file=None, task=task), df_report,
1561
  gr.File(visible=False), gr.File(visible=False))
1562
 
 
1780
  higher identities usually lead to greater confidence in the predictions.<br>
1781
  """)
1782
  drug_screen_opts = gr.CheckboxGroup(
1783
+ label="Step 6. Select Advanced Options",
1784
+ value=DRUG_SCRENN_CPI_OPTS[0],
1785
  choices=DRUG_SCRENN_CPI_OPTS,
1786
+ info="Advanced features - may increase the job computation time. "
1787
+ "See the Help Tip on the right or the Documentation for detailed explanation.",
1788
+
1789
  )
1790
  with gr.Row():
1791
  with gr.Column():
 
1901
  """)
1902
  target_identify_opts = gr.CheckboxGroup(
1903
  choices=TARGET_IDENTIFY_CPI_OPTS,
1904
+ value=TARGET_IDENTIFY_CPI_OPTS[0],
1905
+ label='Step 6. Select Advanced Options',
1906
+ info="Advanced features - may increase the job computation time. "
1907
  "See the Help Tip on the right or the Documentation for detailed explanation."
1908
  )
1909
  with gr.Row():
 
2078
  label='Specify the Task Labels in the Uploaded Dataset')
2079
  with gr.Column(scale=2):
2080
  with gr.Row():
2081
+ with gr.Row():
2082
+ scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Compound Scores')
2083
+ filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Compound Filters')
2084
+ job_opts = gr.CheckboxGroup(visible=False)
2085
+
2086
  with gr.Accordion('Report Generate Options', open=True):
2087
  with gr.Row():
2088
  csv_sep = gr.Radio(label='CSV Delimiter',
 
2844
  )
2845
 
2846
  analyze_btn.click(
2847
+ fn=submit_report, inputs=[raw_df, scores, filters, job_opts, report_task], outputs=[
2848
  html_report, report_df, csv_download_file, html_download_file]
2849
  ).success(
2850
  fn=lambda: [gr.Button(interactive=True)] * 2,
 
2853
  )
2854
 
2855
 
2856
+
2857
  def create_csv_report_file(df, file_report, task, sep, progress=gr.Progress(track_tqdm=True)):
2858
  csv_sep_map = {
2859
  'Comma': ',',