Anton Bushuiev commited on
Commit
3401fbb
·
1 Parent(s): 575d08b

Fix retention time

Browse files
Files changed (2) hide show
  1. app.py +22 -20
  2. data/example_5_spectra.mgf +0 -350
app.py CHANGED
@@ -264,7 +264,7 @@ def setup():
264
  ('https://huggingface.co/datasets/roman-bushuiev/GeMS/resolve/main/data/auxiliary/example_piper_2k_spectra.mgf',
265
  EXAMPLE_PATH / 'example_piper_2k_spectra.mgf',
266
  "PiperNET example spectra"),
267
- ('https://raw.githubusercontent.com/pluskal-lab/DreaMS/cc806fa6fea281c1e57dd81fc512f71de9290017/data/examples/example_5_spectra.mgf',
268
  EXAMPLE_PATH / 'example_5_spectra.mgf',
269
  "DreaMS example spectra")
270
  ]
@@ -337,7 +337,9 @@ def _create_result_row(i, j, n, msdata, msdata_lib, sims, cos_sim, embs, calcula
337
 
338
  # Base row data
339
  row_data = {
340
- 'feature_id': i + 1,
 
 
341
  'precursor_mz': msdata.get_prec_mzs(i),
342
  'topk': n + 1,
343
  'library_j': j,
@@ -377,10 +379,6 @@ def _process_results_dataframe(df, in_pth, calculate_modified_cosine=False):
377
  Returns:
378
  tuple: (processed_df, csv_path)
379
  """
380
- # Sort hits by DreaMS similarity
381
- df_top1 = df[df['topk'] == 1].sort_values('DreaMS_similarity', ascending=False)
382
- df = df.set_index('feature_id').loc[df_top1['feature_id'].values].reset_index()
383
-
384
  # Remove unnecessary columns and round similarity scores
385
  df = df.drop(columns=['i', 'j', 'library_j'])
386
  df['DreaMS_similarity'] = df['DreaMS_similarity'].astype(float).round(4)
@@ -390,12 +388,16 @@ def _process_results_dataframe(df, in_pth, calculate_modified_cosine=False):
390
  df['Modified_cosine_similarity'] = df['Modified_cosine_similarity'].astype(float).round(4)
391
 
392
  df['precursor_mz'] = df['precursor_mz'].astype(float).round(4)
 
 
393
 
394
  # Rename columns for display
395
  column_mapping = {
396
  'topk': 'Top k',
397
  'library_ID': 'Library ID',
398
- "feature_id": "Feature ID",
 
 
399
  "precursor_mz": "Precursor m/z",
400
  "library_SMILES": "Molecule",
401
  "library_SMILES_raw": "SMILES",
@@ -625,11 +627,11 @@ def _create_gradio_interface():
625
 
626
  # Results table
627
  df = gr.Dataframe(
628
- headers=["Row", "Feature ID", "Precursor m/z", "Molecule", "Spectrum",
629
- "Library ID", "DreaMS similarity"],
630
- datatype=["number", "number", "number", "html", "html", "str", "number"],
631
- col_count=(7, "fixed"),
632
- column_widths=["25px", "25px", "28px", "60px", "60px", "50px", "40px"],
633
  max_height=1000,
634
  show_fullscreen_button=True,
635
  show_row_numbers=False,
@@ -643,15 +645,15 @@ def _create_gradio_interface():
643
  # Function to update dataframe headers based on setting
644
  def update_headers(show_cosine):
645
  if show_cosine:
646
- return gr.update(headers=["Row", "Feature ID", "Precursor m/z", "Molecule", "Spectrum",
647
- "Library ID", "DreaMS similarity", "Modified cosine similarity"],
648
- col_count=(8, "fixed"),
649
- column_widths=["25px", "25px", "28px", "60px", "60px", "50px", "40px", "40px"])
650
  else:
651
- return gr.update(headers=["Row", "Feature ID", "Precursor m/z", "Molecule", "Spectrum",
652
- "Library ID", "DreaMS similarity"],
653
- col_count=(7, "fixed"),
654
- column_widths=["25px", "25px", "28px", "60px", "60px", "50px", "40px"])
655
 
656
  # Update headers when setting changes
657
  calculate_modified_cosine.change(
 
264
  ('https://huggingface.co/datasets/roman-bushuiev/GeMS/resolve/main/data/auxiliary/example_piper_2k_spectra.mgf',
265
  EXAMPLE_PATH / 'example_piper_2k_spectra.mgf',
266
  "PiperNET example spectra"),
267
+ ('https://raw.githubusercontent.com/pluskal-lab/DreaMS/refs/heads/main/data/examples/example_5_spectra.mgf',
268
  EXAMPLE_PATH / 'example_5_spectra.mgf',
269
  "DreaMS example spectra")
270
  ]
 
337
 
338
  # Base row data
339
  row_data = {
340
+ 'scan_number': msdata.get_values(SCAN_NUMBER, i),
341
+ 'rt': msdata.get_values(RT, i),
342
+ 'charge': msdata.get_values(CHARGE, i),
343
  'precursor_mz': msdata.get_prec_mzs(i),
344
  'topk': n + 1,
345
  'library_j': j,
 
379
  Returns:
380
  tuple: (processed_df, csv_path)
381
  """
 
 
 
 
382
  # Remove unnecessary columns and round similarity scores
383
  df = df.drop(columns=['i', 'j', 'library_j'])
384
  df['DreaMS_similarity'] = df['DreaMS_similarity'].astype(float).round(4)
 
388
  df['Modified_cosine_similarity'] = df['Modified_cosine_similarity'].astype(float).round(4)
389
 
390
  df['precursor_mz'] = df['precursor_mz'].astype(float).round(4)
391
+ df['rt'] = df['rt'].astype(float).round(2) # Round retention time to 2 decimal places
392
+ df['charge'] = df['charge'].astype(str) # Keep charge as string
393
 
394
  # Rename columns for display
395
  column_mapping = {
396
  'topk': 'Top k',
397
  'library_ID': 'Library ID',
398
+ "scan_number": "Scan number",
399
+ "rt": "Retention time",
400
+ "charge": "Charge",
401
  "precursor_mz": "Precursor m/z",
402
  "library_SMILES": "Molecule",
403
  "library_SMILES_raw": "SMILES",
 
627
 
628
  # Results table
629
  df = gr.Dataframe(
630
+ headers=["Row", "Scan number", "Retention time", "Charge", "Precursor m/z", "Molecule", "Spectrum",
631
+ "DreaMS similarity", "Library ID"],
632
+ datatype=["number", "number", "number", "str", "number", "html", "html", "number", "str"],
633
+ col_count=(9, "fixed"),
634
+ column_widths=["20px", "30px", "30px", "25px", "30px", "40px", "40px", "40px", "50px"],
635
  max_height=1000,
636
  show_fullscreen_button=True,
637
  show_row_numbers=False,
 
645
  # Function to update dataframe headers based on setting
646
  def update_headers(show_cosine):
647
  if show_cosine:
648
+ return gr.update(headers=["Row", "Scan number", "Retention time", "Charge", "Precursor m/z", "Molecule", "Spectrum",
649
+ "DreaMS similarity", "Library ID", "Modified cosine similarity"],
650
+ col_count=(10, "fixed"),
651
+ column_widths=["20px", "30px", "30px", "25px", "30px", "40px", "40px", "40px", "50px", "40px"])
652
  else:
653
+ return gr.update(headers=["Row", "Scan number", "Retention time", "Charge", "Precursor m/z", "Molecule", "Spectrum",
654
+ "DreaMS similarity", "Library ID"],
655
+ col_count=(9, "fixed"),
656
+ column_widths=["20px", "30px", "30px", "25px", "30px", "40px", "40px", "40px", "50px"])
657
 
658
  # Update headers when setting changes
659
  calculate_modified_cosine.change(
data/example_5_spectra.mgf DELETED
@@ -1,350 +0,0 @@
1
- BEGIN IONS
2
- NAME=DMAPT
3
- DESCRIPTION=MCE bioactive compounds library
4
- EXACTMASS=293.199094
5
- FORMULA=C17H27NO3
6
- INCHI=InChI=1S/C17H27NO3/c1-11-6-5-9-17(2)15(21-17)14-12(8-7-11)13(10-18(3)4)16(19)20-14/h6,12-15H,5,7-10H2,1-4H3/b11-6-/t12-,13+,14-,15+,17+/m0/s1
7
- INCHIAUX=UJNSFDHVIBGEJZ-CMRIBGNTSA-N
8
- SMILES=C/C1=C/CC[C@@]2(C)O[C@@H]2[C@H]2OC(=O)[C@H](CN(C)C)[C@@H]2CC1
9
- FEATURE_ID=-1
10
- MSLEVEL=2
11
- RTINSECONDS=69.34
12
- ADDUCT=[M+H]+
13
- PEPMASS=294.20637
14
- CHARGE=1
15
- SPECTYPE=ALL_ENERGIES
16
- Collision energy=60.0
17
- FRAGMENTATION_METHOD=HCD
18
- ISOLATION_WINDOW=1.2000000476839432
19
- Acquisition=Crude
20
- INSTRUMENT_TYPE=Orbitrap
21
- SOURCE_INSTRUMENT=Orbitrap ID-X
22
- IMS_TYPE=none
23
- ION_SOURCE=ESI
24
- IONMODE=Positive
25
- PI=Tomas Pluskal
26
- DATACOLLECTOR=Corinna Brungs
27
- DATASET_ID=MSVPLACEHOLDERID
28
- USI=mzspec:MSVPLACEHOLDERID:20220601_100AGC_pluskal_mce_1D1_A13_id.mzML:-1
29
- SCANS=-1
30
- PRECURSOR_PURITY=1.0
31
- QUALITY_CHIMERIC=PASSED
32
- QUALITY_EXPLAINED_INTENSITY=0.95719075
33
- QUALITY_EXPLAINED_SIGNALS=0.91803277
34
- Num peaks=61
35
- 42.033739 2.023
36
- 43.017743 1.244
37
- 43.041538 0.375
38
- 44.049385 0.271
39
- 46.064932 0.633
40
- 55.053921 0.247
41
- 56.049297 0.434
42
- 58.061086 0.921
43
- 58.064829 100
44
- 58.068315 4.309
45
- 58.071661 1.067
46
- 58.074808 0.3
47
- 67.053963 0.832
48
- 69.069565 0.262
49
- 79.053978 1.014
50
- 81.069616 1.317
51
- 82.064804 0.901
52
- 84.080497 0.567
53
- 91.053903 1.516
54
- 93.069577 2.084
55
- 94.064888 0.298
56
- 95.048843 0.28
57
- 95.085147 0.577
58
- 97.088348 0.201
59
- 98.059745 0.372
60
- 105.069597 1.626
61
- 106.064804 0.358
62
- 107.085253 1.159
63
- 108.080338 0.242
64
- 109.064416 0.257
65
- 109.100923 0.465
66
- 110.096075 1.043
67
- 116.070213 0.429
68
- 117.069763 0.601
69
- 119.085279 1.489
70
- 121.100884 0.483
71
- 129.069626 0.285
72
- 131.085311 0.665
73
- 133.10089 0.474
74
- 134.059753 0.937
75
- 135.117294 0.205
76
- 144.093231 0.522
77
- 145.100601 0.202
78
- 147.11673 0.298
79
- 149.13218 2.214
80
- 159.116485 2.567
81
- 161.131911 0.35
82
- 164.107071 0.243
83
- 164.143127 0.255
84
- 175.112091 0.209
85
- 177.127193 1.388
86
- 185.13264 0.432
87
- 192.174698 0.38
88
- 203.14307 0.317
89
- 222.185104 0.206
90
- 231.13818 0.57
91
- 249.148476 2.331
92
- 250.216614 0.841
93
- 251.117976 1.62
94
- 294.128296 0.191
95
- 294.206632 26.983
96
- END IONS
97
-
98
- BEGIN IONS
99
- NAME=Mirk-IN-1
100
- DESCRIPTION=MCE bioactive compounds library
101
- EXACTMASS=497.065759
102
- FORMULA=C23H17Cl2N5O4
103
- INCHI=InChI=1S/C23H17Cl2N5O4/c1-34-23-27-11-14-8-16(22(33)29-19(14)30-23)21(32)28-18-9-13(5-6-17(18)25)20(31)26-10-12-3-2-4-15(24)7-12/h2-9,11H,10H2,1H3,(H,26,31)(H,28,32)(H,27,29,30,33)
104
- INCHIAUX=CQKBSRPVZZLCJE-UHFFFAOYSA-N
105
- SMILES=COc1ncc2cc(C(=O)Nc3c(Cl)ccc(C(=O)NCc4cc(Cl)ccc4)c3)c(=O)[nH]c2n1
106
- FEATURE_ID=-1
107
- MSLEVEL=2
108
- RTINSECONDS=115.08
109
- ADDUCT=[M+H]+
110
- PEPMASS=498.07304
111
- CHARGE=1
112
- SPECTYPE=ALL_MSN_TO_PSEUDO_MS2
113
- Collision energy=60.0
114
- FRAGMENTATION_METHOD=HCD
115
- ISOLATION_WINDOW=1.2000000476839432
116
- Acquisition=Crude
117
- INSTRUMENT_TYPE=Orbitrap
118
- SOURCE_INSTRUMENT=Orbitrap ID-X
119
- IMS_TYPE=none
120
- ION_SOURCE=ESI
121
- IONMODE=Positive
122
- PI=Tomas Pluskal
123
- DATACOLLECTOR=Corinna Brungs
124
- DATASET_ID=MSVPLACEHOLDERID
125
- USI=mzspec:MSVPLACEHOLDERID:20220601_pluskal_mce_1D1_A4_id.mzML:-1
126
- SCANS=-1
127
- PRECURSOR_PURITY=1.0
128
- QUALITY_CHIMERIC=PASSED
129
- QUALITY_EXPLAINED_INTENSITY=0.93082154
130
- QUALITY_EXPLAINED_SIGNALS=0.88461536
131
- Num peaks=52
132
- 42.033703 6.933
133
- 58.028778 11.37
134
- 59.032055 2.116
135
- 60.03299 1.268
136
- 63.022655 2.006
137
- 64.017876 2.374
138
- 65.038353 0.923
139
- 66.033494 2.134
140
- 69.992175 1.938
141
- 78.033541 4.097
142
- 79.028745 1.257
143
- 86.060081 9.959
144
- 87.06353 3.769
145
- 88.064438 1.499
146
- 91.017532 3.789
147
- 93.044386 7.213
148
- 104.049591 5.292
149
- 105.044537 4.776
150
- 105.052979 3.494
151
- 106.028389 1.142
152
- 120.055412 7.778
153
- 121.039436 6.751
154
- 125.014962 2.948
155
- 133.027011 2.074
156
- 133.039425 3.979
157
- 135.018616 0.908
158
- 148.050389 4.721
159
- 154.005085 2.139
160
- 160.050642 3.311
161
- 161.034588 1.467
162
- 176.045663 22.882
163
- 194.056314 24.336
164
- 204.040476 24.707
165
- 221.080734 1.76
166
- 222.05116 100
167
- 222.087845 1.473
168
- 223.089127 1.679
169
- 248.069031 1.01
170
- 249.074326 2.424
171
- 250.078537 2.556
172
- 251.081848 1.513
173
- 277.029205 1.552
174
- 293.066956 1.309
175
- 308.148193 1.723
176
- 309.15033 3.711
177
- 310.153687 1.904
178
- 321.062225 1.269
179
- 357.03932 4.785
180
- 411.191315 1.423
181
- 412.193907 5.738
182
- 413.196318 8.005
183
- 498.073273 1.812
184
- END IONS
185
-
186
- BEGIN IONS
187
- NAME=1373215-15-6
188
- DESCRIPTION=MCE bioactive compounds library
189
- EXACTMASS=484.212198
190
- FORMULA=C25H29FN4O5
191
- INCHI=InChI=1S/C25H29FN4O5/c1-27-25(32)35-22-15-34-21-7-6-18(29-8-10-30(11-9-29)19-13-33-14-19)12-20(21)23(22)28-24(31)16-2-4-17(26)5-3-16/h2-7,12,19,22-23H,8-11,13-15H2,1H3,(H,27,32)(H,28,31)/t22-,23-/m0/s1
192
- INCHIAUX=NDEBZCZEAVMSQF-GOTSBHOMSA-N
193
- SMILES=CNC(=O)O[C@H]1COc2c(cc(N3CCN(C4COC4)CC3)cc2)[C@@H]1NC(=O)c1ccc(F)cc1
194
- FEATURE_ID=-1
195
- MSLEVEL=2
196
- RTINSECONDS=77.81
197
- ADDUCT=[M+H]+
198
- PEPMASS=485.21947
199
- CHARGE=1
200
- SPECTYPE=SAME_ENERGY
201
- Collision energy=30.0
202
- FRAGMENTATION_METHOD=HCD
203
- ISOLATION_WINDOW=1.2000000476839432
204
- Acquisition=Crude
205
- INSTRUMENT_TYPE=Orbitrap
206
- SOURCE_INSTRUMENT=Orbitrap ID-X
207
- IMS_TYPE=none
208
- ION_SOURCE=ESI
209
- IONMODE=Positive
210
- PI=Tomas Pluskal
211
- DATACOLLECTOR=Corinna Brungs
212
- DATASET_ID=MSVPLACEHOLDERID
213
- USI=mzspec:MSVPLACEHOLDERID:20220601_pluskal_mce_1D1_A8_id.mzML:-1
214
- SCANS=-1
215
- PRECURSOR_PURITY=1.0
216
- QUALITY_CHIMERIC=PASSED
217
- QUALITY_EXPLAINED_INTENSITY=1.0
218
- QUALITY_EXPLAINED_SIGNALS=1.0
219
- Num peaks=15
220
- 70.064962 2.862
221
- 84.080627 1.511
222
- 109.076332 1.385
223
- 114.091324 2.886
224
- 123.023911 14.149
225
- 161.059692 2.52
226
- 202.086288 1.435
227
- 230.105148 3.367
228
- 241.133691 26.664
229
- 261.159847 3.062
230
- 271.144251 41.501
231
- 289.154956 100
232
- 346.175893 26.1
233
- 351.136749 1.2
234
- 410.186693 8.75
235
- END IONS
236
-
237
- BEGIN IONS
238
- NAME=IPSU
239
- DESCRIPTION=MCE bioactive compounds library
240
- EXACTMASS=405.216475
241
- FORMULA=C23H27N5O2
242
- INCHI=InChI=1S/C23H27N5O2/c1-30-20-7-11-24-22(26-20)27-13-9-23(10-14-27)8-4-12-28(21(23)29)16-17-15-25-19-6-3-2-5-18(17)19/h2-3,5-7,11,15,25H,4,8-10,12-14,16H2,1H3
243
- INCHIAUX=PCMHOSYCWRRHTG-UHFFFAOYSA-N
244
- SMILES=COc1nc(N2CCC3(CCCN(Cc4c[nH]c5ccccc45)C3=O)CC2)ncc1
245
- FEATURE_ID=660
246
- MSLEVEL=2
247
- RTINSECONDS=110.45
248
- ADDUCT=[M+H]+
249
- PEPMASS=406.22375
250
- CHARGE=1
251
- Collision energy=20.0
252
- FRAGMENTATION_METHOD=HCD
253
- ISOLATION_WINDOW=1.2000000476839432
254
- Acquisition=Crude
255
- INSTRUMENT_TYPE=Orbitrap
256
- SOURCE_INSTRUMENT=Orbitrap ID-X
257
- IMS_TYPE=none
258
- ION_SOURCE=ESI
259
- IONMODE=Positive
260
- PI=Tomas Pluskal
261
- DATACOLLECTOR=Corinna Brungs
262
- DATASET_ID=MSVPLACEHOLDERID
263
- USI=mzspec:MSVPLACEHOLDERID:20220601_pluskal_mce_1D1_A1_id.mzML:660
264
- SCANS=660
265
- PRECURSOR_PURITY=0.9731724062527856
266
- QUALITY_CHIMERIC=PASSED
267
- QUALITY_EXPLAINED_INTENSITY=0.99532574
268
- QUALITY_EXPLAINED_SIGNALS=0.8888889
269
- Num peaks=9
270
- 45.134823 1.082
271
- 45.13699 1.064
272
- 110.096245 17.184
273
- 130.064972 13.97
274
- 136.111862 0.874
275
- 277.16571 100
276
- 289.165924 52.842
277
- 307.177856 0.793
278
- 406.223083 43.696
279
- END IONS
280
-
281
- BEGIN IONS
282
- NAME=Vadimezan
283
- DESCRIPTION=MCE bioactive compounds library
284
- EXACTMASS=282.089209
285
- FORMULA=C17H14O4
286
- INCHI=InChI=1S/C17H14O4/c1-9-6-7-13-15(20)12-5-3-4-11(8-14(18)19)17(12)21-16(13)10(9)2/h3-7H,8H2,1-2H3,(H,18,19)
287
- INCHIAUX=XGOYIMQSIKSOBS-UHFFFAOYSA-N
288
- SMILES=Cc1c(C)c2c(cc1)c(=O)c1cccc(CC(=O)O)c1o2
289
- FEATURE_ID=474
290
- MSLEVEL=2
291
- RTINSECONDS=113.94
292
- ADDUCT=[M+H]+
293
- PEPMASS=283.09648
294
- CHARGE=1
295
- Collision energy=60.0
296
- FRAGMENTATION_METHOD=HCD
297
- ISOLATION_WINDOW=1.2000000476839432
298
- Acquisition=Crude
299
- INSTRUMENT_TYPE=Orbitrap
300
- SOURCE_INSTRUMENT=Orbitrap ID-X
301
- IMS_TYPE=none
302
- ION_SOURCE=ESI
303
- IONMODE=Positive
304
- PI=Tomas Pluskal
305
- DATACOLLECTOR=Corinna Brungs
306
- DATASET_ID=MSVPLACEHOLDERID
307
- USI=mzspec:MSVPLACEHOLDERID:20220601_pluskal_mce_1D1_A2_id.mzML:474
308
- SCANS=474
309
- PRECURSOR_PURITY=0.973405752811273
310
- QUALITY_CHIMERIC=PASSED
311
- QUALITY_EXPLAINED_INTENSITY=0.99386805
312
- QUALITY_EXPLAINED_SIGNALS=0.9444444
313
- Num peaks=36
314
- 91.053741 1.144
315
- 105.069382 1.254
316
- 141.069168 1.139
317
- 152.061859 1.019
318
- 153.069305 1.283
319
- 155.085022 0.966
320
- 158.033508 1.081
321
- 165.069733 7.345
322
- 166.077545 9.805
323
- 167.085587 4.037
324
- 168.093094 3.638
325
- 178.077515 2.42
326
- 179.085281 1.429
327
- 181.06459 6.555
328
- 181.101135 2.534
329
- 183.116669 0.903
330
- 190.078049 0.997
331
- 191.085159 1.264
332
- 193.101242 1.467
333
- 194.07254 10.324
334
- 195.080231 21.628
335
- 196.087906 3.722
336
- 208.088181 2.735
337
- 209.059555 0.982
338
- 209.096024 59.124
339
- 209.119858 0.749
340
- 210.104065 2.296
341
- 221.096359 0.981
342
- 222.067566 2.431
343
- 223.0755 19.616
344
- 225.091003 2.035
345
- 236.083176 2.085
346
- 237.090988 100
347
- 238.098816 15.987
348
- 239.106781 0.935
349
- 283.096832 2.447
350
- END IONS