libokj commited on
Commit
756d6b0
·
1 Parent(s): a12922f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -75
app.py CHANGED
@@ -846,20 +846,20 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
846
  infer_flag = gr.State(value=False)
847
 
848
  with gr.Tabs() as tabs:
849
- with gr.TabItem(label='Drug hit screening', id=0):
850
  gr.Markdown('''
851
- # <center>DeepSEQreen Drug Hit Screening</center>
852
- <center>
853
- To predict interactions/binding affinities of a single target against a library of compounds.
854
- </center>
855
  ''')
856
  with gr.Blocks() as screen_block:
857
  with gr.Column() as screen_page:
858
  with gr.Row():
859
  with gr.Column():
860
  HelpTip(
861
- "Enter (paste) a amino acid sequence below manually or upload a FASTA file."
862
- "If multiple entities are in the FASTA, only the first will be used."
863
  "Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for "
864
  "the sequence."
865
  )
@@ -883,11 +883,11 @@ To predict interactions/binding affinities of a single target against a library
883
  info='Organism scientific name (default: Homo sapiens).',
884
  placeholder='Homo sapiens', show_label=False,
885
  visible=False, interactive=True, scale=4, )
886
- target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
887
  visible=True, variant='primary',
888
  size='lg')
889
- target_paste_markdown = gr.Button(value='Or paste your sequence below', visible=True)
890
- target_query_btn = gr.Button(value='Query the sequence', variant='primary',
891
  visible=False, scale=4)
892
  # with gr.Row():
893
  # example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
@@ -905,27 +905,28 @@ To predict interactions/binding affinities of a single target against a library
905
  HelpTip(
906
  "Click Auto-detect to identify the protein family using sequence alignment. "
907
  "This optional step allows applying a family-specific model instead of a all-family "
908
- "model (general)."
909
  "Manually select general if the alignment results are unsatisfactory."
910
  )
911
  drug_screen_target_family = gr.Dropdown(
912
  choices=list(TARGET_FAMILY_MAP.keys()),
913
  value='General',
914
- label='Step 2. Select Input Protein Family (Optional)', interactive=True)
915
  # with gr.Column(scale=1, min_width=24):
916
 
917
  with gr.Row():
918
  with gr.Column():
919
- target_family_detect_btn = gr.Button(value='Auto-detect', variant='primary')
920
 
921
  with gr.Row():
922
  with gr.Column():
923
  HelpTip(
924
- "Select a preset compound library (e.g., DrugBank)."
925
  "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
926
- "or use an SDF file."
 
927
  )
928
- drug_library = gr.Dropdown(label='Step 3. Select or Upload a Compound Library',
929
  choices=list(DRUG_LIBRARY_MAP.keys()))
930
  with gr.Row():
931
  gr.File(label='Example SDF compound library',
@@ -933,42 +934,41 @@ To predict interactions/binding affinities of a single target against a library
933
  gr.File(label='Example CSV compound library',
934
  value='data/examples/compound_library.csv', interactive=False)
935
  drug_library_upload_btn = gr.UploadButton(
936
- label='Upload a custom library', variant='primary')
937
  drug_library_upload = gr.File(label='Custom compound library file', visible=False)
938
  with gr.Row():
939
  with gr.Column():
940
  HelpTip(
941
  "Interaction prediction provides you binding probability score between the target of "
942
- "interest and each compound in the library,"
943
  "while affinity prediction directly estimates their binding strength measured using "
944
  "IC50."
945
  )
946
  drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
947
- label='Step 4. Select a Prediction Task',
948
  value='Compound-protein interaction')
949
 
950
  with gr.Row():
951
  with gr.Column():
952
  HelpTip(
953
  "Select your preferred model, or click Recommend for the best-performing model based "
954
- "on the selected task, family, and whether the target was trained."
955
  "Please refer to documentation for detailed benchamrk results."
956
  )
957
  drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
958
  label='Step 5. Select a Preset Model')
959
- screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
960
  with gr.Row():
961
  with gr.Column():
962
  drug_screen_email = gr.Textbox(
963
- label='Step 6. Email (Optional)',
964
- info="If an email is provided, a notification email will be sent to you when your job "
965
- "is completed."
966
  )
967
 
968
  with gr.Row(visible=True):
969
  with gr.Column():
970
  # drug_screen_clr_btn = gr.ClearButton(size='lg')
971
- drug_screen_btn = gr.Button(value='SCREEN', variant='primary', size='lg')
972
  # TODO Modify the pd df directly with df['X2'] = target
973
 
974
  screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
@@ -980,19 +980,19 @@ To predict interactions/binding affinities of a single target against a library
980
 
981
  with gr.TabItem(label='Target protein identification', id=1):
982
  gr.Markdown('''
983
- # <center>Target Protein Identification</center>
984
-
985
- <center>
986
- To predict interactions/binding affinities of a single compound against a library of protein targets.
987
- </center>
988
  ''')
989
  with gr.Blocks() as identify_block:
990
  with gr.Column() as identify_page:
991
  with gr.Row():
992
  with gr.Column():
993
  HelpTip(
994
- "Enter (paste) a compound SMILES below manually or upload a SDF file."
995
- "If multiple entities are in the SDF, only the first will be used."
996
  "SMILES can be obtained by searching for the compound of interest in databases such "
997
  "as NCBI, PubChem and and ChEMBL."
998
  )
@@ -1002,7 +1002,7 @@ To predict interactions/binding affinities of a single compound against a librar
1002
  info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
1003
  value='SMILES',
1004
  interactive=True)
1005
- compound_upload_btn = gr.UploadButton(label='Upload', variant='primary',
1006
  type='binary', visible=False)
1007
 
1008
  compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
@@ -1011,23 +1011,25 @@ To predict interactions/binding affinities of a single compound against a librar
1011
  with gr.Row():
1012
  with gr.Column():
1013
  HelpTip(
1014
- "By default, models trained on all protein families (general) will be applied."
1015
- "If the proteins in the target library of interest all belong to the same protein "
1016
- "family, manually selecting the family is supported."
1017
  )
1018
  target_identify_target_family = gr.Dropdown(choices=['General'],
1019
  value='General',
1020
- label='Step 2. Select Target Protein Family ('
1021
  'Optional)')
1022
 
1023
  with gr.Row():
1024
  with gr.Column():
1025
  HelpTip(
1026
- "Select a preset target library (e.g., ChEMBL33_human_proteins)."
1027
  "Alternatively, upload a CSV file with a column named X2 containing target protein "
1028
- "sequences, or use an FASTA file."
 
 
1029
  )
1030
- target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
1031
  choices=list(TARGET_LIBRARY_MAP.keys()))
1032
  with gr.Row():
1033
  gr.File(label='Example FASTA target library',
@@ -1035,7 +1037,7 @@ To predict interactions/binding affinities of a single compound against a librar
1035
  gr.File(label='Example CSV target library',
1036
  value='data/examples/target_library.csv', interactive=False)
1037
  target_library_upload_btn = gr.UploadButton(
1038
- label='Upload a custom library', variant='primary')
1039
  target_library_upload = gr.File(label='Custom target library file', visible=False)
1040
 
1041
  with gr.Row():
@@ -1047,7 +1049,7 @@ To predict interactions/binding affinities of a single compound against a librar
1047
  "IC50."
1048
  )
1049
  target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
1050
- label='Step 4. Select a Prediction Task',
1051
  value='Compound-protein interaction')
1052
 
1053
  with gr.Row():
@@ -1057,21 +1059,21 @@ To predict interactions/binding affinities of a single compound against a librar
1057
  "on the selected task, family, and whether the compound was trained. "
1058
  "Please refer to documentation for detailed benchamrk results."
1059
  )
1060
- target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a '
1061
- 'Preset Model')
1062
- identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
 
1063
 
1064
  with gr.Row():
1065
  with gr.Column():
1066
  target_identify_email = gr.Textbox(
1067
- label='Step 6. Email (Optional)',
1068
- info="If an email is provided, a notification email will be sent to you when your job "
1069
- "is completed."
1070
  )
1071
 
1072
  with gr.Row(visible=True):
1073
  # target_identify_clr_btn = gr.ClearButton(size='lg')
1074
- target_identify_btn = gr.Button(value='IDENTIFY', variant='primary', size='lg')
1075
 
1076
  identify_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
1077
  identify_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
@@ -1081,7 +1083,7 @@ To predict interactions/binding affinities of a single compound against a librar
1081
  with gr.TabItem(label='Interaction pair inference', id=2):
1082
  gr.Markdown('''
1083
  # <center>Interaction Pair Inference</center>
1084
- <center>To predict interactions/binding affinities between any compound-protein pairs.</center>
1085
  ''')
1086
  with gr.Blocks() as infer_block:
1087
  with gr.Column() as infer_page:
@@ -1089,22 +1091,29 @@ To predict interactions/binding affinities of a single compound against a librar
1089
  "A custom interation pair dataset can be a CSV file with 2 required columns "
1090
  "(X1 for smiles and X2 for sequences) "
1091
  "and optionally 2 ID columns (ID1 for compound ID and ID2 for target ID), "
1092
- "or generated from a FASTA file containing multiple"
1093
- "sequences and a SDF file containing multiple compounds."
 
 
 
1094
  )
1095
  infer_type = gr.Dropdown(
1096
- choices=['Upload a CSV interaction pair dataset',
1097
  'Upload a compound library and a target library'],
1098
  label='Step 1. Select Pair Input Type and Input',
1099
- value='Upload a CSV interaction pair dataset')
1100
  with gr.Column() as pair_upload:
1101
- gr.File(label="Example custom dataset",
1102
  value="data/examples/interaction_pair_inference.csv",
1103
  interactive=False)
 
 
 
1104
  with gr.Column():
1105
  infer_data_for_predict = gr.File(
1106
- label='Upload a custom dataset', file_count="single", type='filepath', visible=True)
1107
- with gr.Column() as pair_generate:
 
1108
  with gr.Row():
1109
  gr.File(label='Example SDF compound library',
1110
  value='data/examples/compound_library.sdf', interactive=False)
@@ -1116,48 +1125,56 @@ To predict interactions/binding affinities of a single compound against a librar
1116
  gr.File(label='Example CSV target library',
1117
  value='data/examples/target_library.csv', interactive=False)
1118
  with gr.Row():
1119
- infer_drug = gr.File(label='SDF/CSV file containing multiple compounds',
 
 
 
1120
  file_count="single", type='filepath')
1121
- infer_target = gr.File(label='FASTA/CSV file containing multiple targets',
1122
  file_count="single", type='filepath')
1123
 
1124
  with gr.Row():
1125
  with gr.Column():
1126
  HelpTip(
1127
  "By default, models trained on all protein families (general) will be applied. "
1128
- "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
 
1129
  )
1130
  pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
1131
  value='General',
1132
- label='Step 2. Select Target Protein Family (Optional)')
1133
 
1134
  with gr.Row():
1135
  with gr.Column():
1136
  HelpTip(
1137
- "Interaction prediction provides you binding probability score between the target of interest and each compound in the library, "
1138
- "while affinity prediction directly estimates their binding strength measured using IC50."
 
 
1139
  )
1140
  pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
1141
- label='Step 3. Select a Prediction Task',
1142
  value='Compound-protein interaction')
1143
 
1144
  with gr.Row():
1145
  with gr.Column():
1146
  HelpTip("Select your preferred model. "
1147
- "Please refer to documentation for detailed benchamrk results."
1148
  )
1149
- pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
1150
- # infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
 
 
1151
 
1152
  with gr.Row():
1153
  pair_infer_email = gr.Textbox(
1154
- label='Step 5. Email (Optional)',
1155
- info="If an email is provided, a notification email will be sent to you when your job is completed."
1156
  )
1157
 
1158
  with gr.Row(visible=True):
1159
  # pair_infer_clr_btn = gr.ClearButton(size='lg')
1160
- pair_infer_btn = gr.Button(value='INFER', variant='primary', size='lg')
1161
 
1162
  infer_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
1163
  f"When it's done, you will be redirected to the report page. "
@@ -1400,7 +1417,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1400
  elif task == 'DTA':
1401
  train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
1402
  score = 'CI'
1403
- if smiles not in train['X1']:
1404
  scenario = "Unseen drug"
1405
  else:
1406
  scenario = "Seen drug"
@@ -1429,21 +1446,26 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1429
  pair_generate: gr.Column(visible=True),
1430
  infer_data_for_predict: None,
1431
  infer_drug: None,
1432
- infer_target: None
 
 
1433
  }
1434
  match upload_type:
1435
- case "Upload a CSV interaction pair dataset":
1436
  return {
1437
  pair_upload: gr.Column(visible=True),
1438
  pair_generate: gr.Column(visible=False),
1439
  infer_data_for_predict: None,
1440
  infer_drug: None,
1441
- infer_target: None
 
 
1442
  }
1443
 
1444
 
1445
  infer_type.select(fn=infer_type_change, inputs=infer_type,
1446
- outputs=[pair_upload, pair_generate, infer_data_for_predict, infer_drug, infer_target])
 
1447
 
1448
 
1449
  def drug_screen_validate(fasta, library, library_upload, state, progress=gr.Progress(track_tqdm=True)):
 
846
  infer_flag = gr.State(value=False)
847
 
848
  with gr.Tabs() as tabs:
849
+ with gr.TabItem(label='Drug Hit Screening', id=0):
850
  gr.Markdown('''
851
+ # <center>Drug Hit Screening</center>
852
+ <center>
853
+ To predict interactions or binding affinities of a single target against a compound library.
854
+ </center>
855
  ''')
856
  with gr.Blocks() as screen_block:
857
  with gr.Column() as screen_page:
858
  with gr.Row():
859
  with gr.Column():
860
  HelpTip(
861
+ "Enter (paste) a amino acid sequence below manually or upload a FASTA file. "
862
+ "If multiple entities are in the FASTA, only the first will be used. "
863
  "Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for "
864
  "the sequence."
865
  )
 
883
  info='Organism scientific name (default: Homo sapiens).',
884
  placeholder='Homo sapiens', show_label=False,
885
  visible=False, interactive=True, scale=4, )
886
+ target_upload_btn = gr.UploadButton(label='Upload a FASTA File', type='binary',
887
  visible=True, variant='primary',
888
  size='lg')
889
+ target_paste_markdown = gr.Button(value='OR Paste Your Sequence Below', visible=True)
890
+ target_query_btn = gr.Button(value='Query the Sequence', variant='primary',
891
  visible=False, scale=4)
892
  # with gr.Row():
893
  # example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
 
905
  HelpTip(
906
  "Click Auto-detect to identify the protein family using sequence alignment. "
907
  "This optional step allows applying a family-specific model instead of a all-family "
908
+ "model (general). "
909
  "Manually select general if the alignment results are unsatisfactory."
910
  )
911
  drug_screen_target_family = gr.Dropdown(
912
  choices=list(TARGET_FAMILY_MAP.keys()),
913
  value='General',
914
+ label='Step 2. Select Target Family (Optional)', interactive=True)
915
  # with gr.Column(scale=1, min_width=24):
916
 
917
  with gr.Row():
918
  with gr.Column():
919
+ target_family_detect_btn = gr.Button(value='OR Let Us Auto-Detect for You', variant='primary')
920
 
921
  with gr.Row():
922
  with gr.Column():
923
  HelpTip(
924
+ "Select a preset compound library (e.g., DrugBank). "
925
  "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
926
+ "or use an SDF file (Max. 10,000 compounds per task). Example CSV and SDF files are "
927
+ "provided below and can be downloaded by clicking the lower right corner."
928
  )
929
+ drug_library = gr.Dropdown(label='Step 3. Select a Preset Compound Library',
930
  choices=list(DRUG_LIBRARY_MAP.keys()))
931
  with gr.Row():
932
  gr.File(label='Example SDF compound library',
 
934
  gr.File(label='Example CSV compound library',
935
  value='data/examples/compound_library.csv', interactive=False)
936
  drug_library_upload_btn = gr.UploadButton(
937
+ label='OR Upload Your Own Library', variant='primary')
938
  drug_library_upload = gr.File(label='Custom compound library file', visible=False)
939
  with gr.Row():
940
  with gr.Column():
941
  HelpTip(
942
  "Interaction prediction provides you binding probability score between the target of "
943
+ "interest and each compound in the library, "
944
  "while affinity prediction directly estimates their binding strength measured using "
945
  "IC50."
946
  )
947
  drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
948
+ label='Step 4. Select the Prediction Task You Want to Conduct',
949
  value='Compound-protein interaction')
950
 
951
  with gr.Row():
952
  with gr.Column():
953
  HelpTip(
954
  "Select your preferred model, or click Recommend for the best-performing model based "
955
+ "on the selected task, family, and whether the target was trained. "
956
  "Please refer to documentation for detailed benchamrk results."
957
  )
958
  drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
959
  label='Step 5. Select a Preset Model')
960
+ screen_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You', variant='primary')
961
  with gr.Row():
962
  with gr.Column():
963
  drug_screen_email = gr.Textbox(
964
+ label='Step 6. Input Your Email Address (Optional)',
965
+ info="Your email address will be used to notify you about the completion of your job."
 
966
  )
967
 
968
  with gr.Row(visible=True):
969
  with gr.Column():
970
  # drug_screen_clr_btn = gr.ClearButton(size='lg')
971
+ drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
972
  # TODO Modify the pd df directly with df['X2'] = target
973
 
974
  screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
 
980
 
981
  with gr.TabItem(label='Target protein identification', id=1):
982
  gr.Markdown('''
983
+ # <center>Target Protein Identification</center>
984
+
985
+ <center>
986
+ To predict interactions or binding affinities of a single compound against a protein library.
987
+ </center>
988
  ''')
989
  with gr.Blocks() as identify_block:
990
  with gr.Column() as identify_page:
991
  with gr.Row():
992
  with gr.Column():
993
  HelpTip(
994
+ "Enter (paste) a compound SMILES below manually or upload a SDF file. "
995
+ "If multiple entities are in the SDF, only the first will be used. "
996
  "SMILES can be obtained by searching for the compound of interest in databases such "
997
  "as NCBI, PubChem and and ChEMBL."
998
  )
 
1002
  info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
1003
  value='SMILES',
1004
  interactive=True)
1005
+ compound_upload_btn = gr.UploadButton(label='OR Upload a SDF File', variant='primary',
1006
  type='binary', visible=False)
1007
 
1008
  compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
 
1011
  with gr.Row():
1012
  with gr.Column():
1013
  HelpTip(
1014
+ "By default, models trained on all protein families (general) will be applied. "
1015
+ # "If the proteins in the target library of interest all belong to the same protein "
1016
+ # "family, manually selecting the family is supported."
1017
  )
1018
  target_identify_target_family = gr.Dropdown(choices=['General'],
1019
  value='General',
1020
+ label='Step 2. Select Target Family ('
1021
  'Optional)')
1022
 
1023
  with gr.Row():
1024
  with gr.Column():
1025
  HelpTip(
1026
+ "Select a preset target library (e.g., ChEMBL33_human_proteins). "
1027
  "Alternatively, upload a CSV file with a column named X2 containing target protein "
1028
+ "sequences, or use an FASTA file (Max. 10,000 targets per task). "
1029
+ "Example CSV and SDF files are provided below "
1030
+ "and can be downloaded by clicking the lower right corner."
1031
  )
1032
+ target_library = gr.Dropdown(label='Step 3. Select a Preset Target Library',
1033
  choices=list(TARGET_LIBRARY_MAP.keys()))
1034
  with gr.Row():
1035
  gr.File(label='Example FASTA target library',
 
1037
  gr.File(label='Example CSV target library',
1038
  value='data/examples/target_library.csv', interactive=False)
1039
  target_library_upload_btn = gr.UploadButton(
1040
+ label='OR Upload Your Own Library', variant='primary')
1041
  target_library_upload = gr.File(label='Custom target library file', visible=False)
1042
 
1043
  with gr.Row():
 
1049
  "IC50."
1050
  )
1051
  target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
1052
+ label='Step 4. Select the Prediction Task You Want to Conduct',
1053
  value='Compound-protein interaction')
1054
 
1055
  with gr.Row():
 
1059
  "on the selected task, family, and whether the compound was trained. "
1060
  "Please refer to documentation for detailed benchamrk results."
1061
  )
1062
+ target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
1063
+ label='Step 5. Select a Preset Model')
1064
+ identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
1065
+ variant='primary')
1066
 
1067
  with gr.Row():
1068
  with gr.Column():
1069
  target_identify_email = gr.Textbox(
1070
+ label='Step 6. Input Your Email Address (Optional)',
1071
+ info="Your email address will be used to notify you about the completion of your job."
 
1072
  )
1073
 
1074
  with gr.Row(visible=True):
1075
  # target_identify_clr_btn = gr.ClearButton(size='lg')
1076
+ target_identify_btn = gr.Button(value='SUBMIT THE IDENTIFICATION JOB', variant='primary', size='lg')
1077
 
1078
  identify_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
1079
  identify_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
 
1083
  with gr.TabItem(label='Interaction pair inference', id=2):
1084
  gr.Markdown('''
1085
  # <center>Interaction Pair Inference</center>
1086
+ <center>To predict interactions or binding affinities between up to 10,000 paired compound-protein data.</center>
1087
  ''')
1088
  with gr.Blocks() as infer_block:
1089
  with gr.Column() as infer_page:
 
1091
  "A custom interation pair dataset can be a CSV file with 2 required columns "
1092
  "(X1 for smiles and X2 for sequences) "
1093
  "and optionally 2 ID columns (ID1 for compound ID and ID2 for target ID), "
1094
+ "or generated from a FASTA file containing multiple "
1095
+ "sequences and a SDF file containing multiple compounds. "
1096
+ "Currently, a maximum of 10,000 pairs is supported, "
1097
+ "which means that the size of CSV file or "
1098
+ "the product of the two library sizes should not exceed 10,000."
1099
  )
1100
  infer_type = gr.Dropdown(
1101
+ choices=['Upload a CSV file containing paired compound-protein data',
1102
  'Upload a compound library and a target library'],
1103
  label='Step 1. Select Pair Input Type and Input',
1104
+ value='Upload a CSV file containing paired compound-protein data')
1105
  with gr.Column() as pair_upload:
1106
+ gr.File(label="Example CSV dataset",
1107
  value="data/examples/interaction_pair_inference.csv",
1108
  interactive=False)
1109
+ with gr.Row():
1110
+ infer_csv_prompt = gr.Button(value="Upload Your Own Dataset Below",
1111
+ visible=True)
1112
  with gr.Column():
1113
  infer_data_for_predict = gr.File(
1114
+ label='Upload CSV File Containing Paired Records',
1115
+ file_count="single", type='filepath', visible=True)
1116
+ with gr.Column(visible=False) as pair_generate:
1117
  with gr.Row():
1118
  gr.File(label='Example SDF compound library',
1119
  value='data/examples/compound_library.sdf', interactive=False)
 
1125
  gr.File(label='Example CSV target library',
1126
  value='data/examples/target_library.csv', interactive=False)
1127
  with gr.Row():
1128
+ infer_library_prompt = gr.Button(value="Upload Your Own Libraries Below",
1129
+ visible=False)
1130
+ with gr.Row():
1131
+ infer_drug = gr.File(label='Upload SDF/CSV File Containing Multiple Compounds',
1132
  file_count="single", type='filepath')
1133
+ infer_target = gr.File(label='Upload FASTA/CSV File Containing Multiple Targets',
1134
  file_count="single", type='filepath')
1135
 
1136
  with gr.Row():
1137
  with gr.Column():
1138
  HelpTip(
1139
  "By default, models trained on all protein families (general) will be applied. "
1140
+ "If the proteins in the target library of interest "
1141
+ "all belong to the same protein family, manually selecting the family is supported."
1142
  )
1143
  pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
1144
  value='General',
1145
+ label='Step 2. Select Target Family (Optional)')
1146
 
1147
  with gr.Row():
1148
  with gr.Column():
1149
  HelpTip(
1150
+ "Interaction prediction provides you binding probability score "
1151
+ "between the target of interest and each compound in the library, "
1152
+ "while affinity prediction directly estimates their binding strength "
1153
+ "measured using IC50."
1154
  )
1155
  pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
1156
+ label='Step 3. Select the Prediction Task You Want to Conduct',
1157
  value='Compound-protein interaction')
1158
 
1159
  with gr.Row():
1160
  with gr.Column():
1161
  HelpTip("Select your preferred model. "
1162
+ "Please refer to documentation for detailed benchmark results."
1163
  )
1164
+ pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()),
1165
+ label='Step 4. Select a Preset Model')
1166
+ # infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
1167
+ # variant='primary')
1168
 
1169
  with gr.Row():
1170
  pair_infer_email = gr.Textbox(
1171
+ label='Step 5. Input Your Email Address (Optional)',
1172
+ info="Your email address will be used to notify you about the completion of your job."
1173
  )
1174
 
1175
  with gr.Row(visible=True):
1176
  # pair_infer_clr_btn = gr.ClearButton(size='lg')
1177
+ pair_infer_btn = gr.Button(value='SUBMIT THE INFERENCE JOB', variant='primary', size='lg')
1178
 
1179
  infer_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
1180
  f"When it's done, you will be redirected to the report page. "
 
1417
  elif task == 'DTA':
1418
  train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
1419
  score = 'CI'
1420
+ if not np.isin(smiles, train['X1']):
1421
  scenario = "Unseen drug"
1422
  else:
1423
  scenario = "Seen drug"
 
1446
  pair_generate: gr.Column(visible=True),
1447
  infer_data_for_predict: None,
1448
  infer_drug: None,
1449
+ infer_target: None,
1450
+ infer_csv_prompt: gr.Button(visible=False),
1451
+ infer_library_prompt: gr.Button(visible=True),
1452
  }
1453
  match upload_type:
1454
+ case "Upload a CSV file containing paired compound-protein data":
1455
  return {
1456
  pair_upload: gr.Column(visible=True),
1457
  pair_generate: gr.Column(visible=False),
1458
  infer_data_for_predict: None,
1459
  infer_drug: None,
1460
+ infer_target: None,
1461
+ infer_csv_prompt: gr.Button(visible=True),
1462
+ infer_library_prompt: gr.Button(visible=False),
1463
  }
1464
 
1465
 
1466
  infer_type.select(fn=infer_type_change, inputs=infer_type,
1467
+ outputs=[pair_upload, pair_generate, infer_data_for_predict, infer_drug, infer_target,
1468
+ infer_csv_prompt, infer_library_prompt])
1469
 
1470
 
1471
  def drug_screen_validate(fasta, library, library_upload, state, progress=gr.Progress(track_tqdm=True)):