Anton Bushuiev commited on
Commit
1dd40c0
·
1 Parent(s): 926bf18

Clean, add text and fig

Browse files
Files changed (2) hide show
  1. app.py +12 -16
  2. data/example_5_spectra.mgf +350 -0
app.py CHANGED
@@ -27,7 +27,6 @@ def setup():
27
  urllib.request.urlretrieve(url, target_path)
28
 
29
  # Run simple example as a test and to download weights
30
- # Download example spectra file
31
  example_url = 'https://raw.githubusercontent.com/pluskal-lab/DreaMS/cc806fa6fea281c1e57dd81fc512f71de9290017/data/examples/example_5_spectra.mgf'
32
  example_path = Path('./data/example_5_spectra.mgf')
33
  example_path.parent.mkdir(parents=True, exist_ok=True)
@@ -95,25 +94,28 @@ def predict(lib_pth, in_pth):
95
  # display(Chem.MolFromSmiles(row['library_SMILES']))
96
 
97
  # Sort hits by DreaMS similarity
98
- # df_top1 = df[df['topk'] == 1].sort_values('DreaMS_similarity', ascending=False)
99
- # df = df.set_index('feature_id').loc[df_top1['feature_id'].values].reset_index()
100
- # df
101
 
102
  return df, str(df_path)
103
 
104
 
105
  setup()
106
- app = gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="pink"))
107
  with app:
108
 
109
  # Input GUI
 
 
110
  gr.Markdown(value="""
111
- # DreaMS
 
 
 
 
 
 
112
  """)
113
- # gr.Image("assets/readme-dimer-close-up.png")
114
- # gr.Markdown(value="""
115
- # TODO Some description
116
- # """)
117
  with gr.Row(equal_height=True):
118
  in_pth = gr.File(
119
  file_count="single",
@@ -132,9 +134,6 @@ with app:
132
  datatype=["number", "number", "number", "str", "str", "number", "number", "number", "number"],
133
  col_count=(9, "fixed"),
134
  )
135
- # dropdown = gr.Dropdown(interactive=True, visible=False)
136
- # dropdown_choices_to_plot_args = gr.State([])
137
- # plot = gr.HTML()
138
 
139
  # Main logic
140
  inputs = [in_pth]
@@ -142,8 +141,5 @@ with app:
142
  predict = partial(predict, lib_pth)
143
  predict_button.click(predict, inputs=inputs, outputs=outputs)
144
 
145
- # Update plot on dropdown change
146
- # dropdown.change(update_plot, inputs=[dropdown, dropdown_choices_to_plot_args], outputs=[plot])
147
-
148
 
149
  app.launch(allowed_paths=['./assets'])
 
27
  urllib.request.urlretrieve(url, target_path)
28
 
29
  # Run simple example as a test and to download weights
 
30
  example_url = 'https://raw.githubusercontent.com/pluskal-lab/DreaMS/cc806fa6fea281c1e57dd81fc512f71de9290017/data/examples/example_5_spectra.mgf'
31
  example_path = Path('./data/example_5_spectra.mgf')
32
  example_path.parent.mkdir(parents=True, exist_ok=True)
 
94
  # display(Chem.MolFromSmiles(row['library_SMILES']))
95
 
96
  # Sort hits by DreaMS similarity
97
+ df_top1 = df[df['topk'] == 1].sort_values('DreaMS_similarity', ascending=False)
98
+ df = df.set_index('feature_id').loc[df_top1['feature_id'].values].reset_index()
 
99
 
100
  return df, str(df_path)
101
 
102
 
103
  setup()
104
+ app = gr.Blocks(theme=gr.themes.Default(primary_hue="yellow", secondary_hue="pink"))
105
  with app:
106
 
107
  # Input GUI
108
+ # gr.Markdown(value="""# DreaMS""")
109
+ gr.Image("https://raw.githubusercontent.com/pluskal-lab/DreaMS/cc806fa6fea281c1e57dd81fc512f71de9290017/assets/dreams_background.png", label="DreaMS")
110
  gr.Markdown(value="""
111
+ DreaMS (Deep Representations Empowering the Annotation of Mass Spectra) is a transformer-based
112
+ neural network designed to interpret tandem mass spectrometry (MS/MS) data. Pre-trained in a
113
+ self-supervised way on millions of unannotated spectra from our new GeMS (GNPS Experimental
114
+ Mass Spectra) dataset, DreaMS acquires rich molecular representations by predicting masked
115
+ spectral peaks and chromatographic retention orders. When fine-tuned for tasks such as spectral
116
+ similarity, chemical properties prediction, and fluorine detection, DreaMS achieves state-of-the-art
117
+ performance across various mass spectrometry interpretation tasks (<a href="https://www.nature.com/articles/s41587-025-02663-3">Bushuiev et al., Nature Biotechnology, 2025</a>).
118
  """)
 
 
 
 
119
  with gr.Row(equal_height=True):
120
  in_pth = gr.File(
121
  file_count="single",
 
134
  datatype=["number", "number", "number", "str", "str", "number", "number", "number", "number"],
135
  col_count=(9, "fixed"),
136
  )
 
 
 
137
 
138
  # Main logic
139
  inputs = [in_pth]
 
141
  predict = partial(predict, lib_pth)
142
  predict_button.click(predict, inputs=inputs, outputs=outputs)
143
 
 
 
 
144
 
145
  app.launch(allowed_paths=['./assets'])
data/example_5_spectra.mgf ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BEGIN IONS
2
+ NAME=DMAPT
3
+ DESCRIPTION=MCE bioactive compounds library
4
+ EXACTMASS=293.199094
5
+ FORMULA=C17H27NO3
6
+ INCHI=InChI=1S/C17H27NO3/c1-11-6-5-9-17(2)15(21-17)14-12(8-7-11)13(10-18(3)4)16(19)20-14/h6,12-15H,5,7-10H2,1-4H3/b11-6-/t12-,13+,14-,15+,17+/m0/s1
7
+ INCHIAUX=UJNSFDHVIBGEJZ-CMRIBGNTSA-N
8
+ SMILES=C/C1=C/CC[C@@]2(C)O[C@@H]2[C@H]2OC(=O)[C@H](CN(C)C)[C@@H]2CC1
9
+ FEATURE_ID=-1
10
+ MSLEVEL=2
11
+ RTINSECONDS=69.34
12
+ ADDUCT=[M+H]+
13
+ PEPMASS=294.20637
14
+ CHARGE=1
15
+ SPECTYPE=ALL_ENERGIES
16
+ Collision energy=60.0
17
+ FRAGMENTATION_METHOD=HCD
18
+ ISOLATION_WINDOW=1.2000000476839432
19
+ Acquisition=Crude
20
+ INSTRUMENT_TYPE=Orbitrap
21
+ SOURCE_INSTRUMENT=Orbitrap ID-X
22
+ IMS_TYPE=none
23
+ ION_SOURCE=ESI
24
+ IONMODE=Positive
25
+ PI=Tomas Pluskal
26
+ DATACOLLECTOR=Corinna Brungs
27
+ DATASET_ID=MSVPLACEHOLDERID
28
+ USI=mzspec:MSVPLACEHOLDERID:20220601_100AGC_pluskal_mce_1D1_A13_id.mzML:-1
29
+ SCANS=-1
30
+ PRECURSOR_PURITY=1.0
31
+ QUALITY_CHIMERIC=PASSED
32
+ QUALITY_EXPLAINED_INTENSITY=0.95719075
33
+ QUALITY_EXPLAINED_SIGNALS=0.91803277
34
+ Num peaks=61
35
+ 42.033739 2.023
36
+ 43.017743 1.244
37
+ 43.041538 0.375
38
+ 44.049385 0.271
39
+ 46.064932 0.633
40
+ 55.053921 0.247
41
+ 56.049297 0.434
42
+ 58.061086 0.921
43
+ 58.064829 100
44
+ 58.068315 4.309
45
+ 58.071661 1.067
46
+ 58.074808 0.3
47
+ 67.053963 0.832
48
+ 69.069565 0.262
49
+ 79.053978 1.014
50
+ 81.069616 1.317
51
+ 82.064804 0.901
52
+ 84.080497 0.567
53
+ 91.053903 1.516
54
+ 93.069577 2.084
55
+ 94.064888 0.298
56
+ 95.048843 0.28
57
+ 95.085147 0.577
58
+ 97.088348 0.201
59
+ 98.059745 0.372
60
+ 105.069597 1.626
61
+ 106.064804 0.358
62
+ 107.085253 1.159
63
+ 108.080338 0.242
64
+ 109.064416 0.257
65
+ 109.100923 0.465
66
+ 110.096075 1.043
67
+ 116.070213 0.429
68
+ 117.069763 0.601
69
+ 119.085279 1.489
70
+ 121.100884 0.483
71
+ 129.069626 0.285
72
+ 131.085311 0.665
73
+ 133.10089 0.474
74
+ 134.059753 0.937
75
+ 135.117294 0.205
76
+ 144.093231 0.522
77
+ 145.100601 0.202
78
+ 147.11673 0.298
79
+ 149.13218 2.214
80
+ 159.116485 2.567
81
+ 161.131911 0.35
82
+ 164.107071 0.243
83
+ 164.143127 0.255
84
+ 175.112091 0.209
85
+ 177.127193 1.388
86
+ 185.13264 0.432
87
+ 192.174698 0.38
88
+ 203.14307 0.317
89
+ 222.185104 0.206
90
+ 231.13818 0.57
91
+ 249.148476 2.331
92
+ 250.216614 0.841
93
+ 251.117976 1.62
94
+ 294.128296 0.191
95
+ 294.206632 26.983
96
+ END IONS
97
+
98
+ BEGIN IONS
99
+ NAME=Mirk-IN-1
100
+ DESCRIPTION=MCE bioactive compounds library
101
+ EXACTMASS=497.065759
102
+ FORMULA=C23H17Cl2N5O4
103
+ INCHI=InChI=1S/C23H17Cl2N5O4/c1-34-23-27-11-14-8-16(22(33)29-19(14)30-23)21(32)28-18-9-13(5-6-17(18)25)20(31)26-10-12-3-2-4-15(24)7-12/h2-9,11H,10H2,1H3,(H,26,31)(H,28,32)(H,27,29,30,33)
104
+ INCHIAUX=CQKBSRPVZZLCJE-UHFFFAOYSA-N
105
+ SMILES=COc1ncc2cc(C(=O)Nc3c(Cl)ccc(C(=O)NCc4cc(Cl)ccc4)c3)c(=O)[nH]c2n1
106
+ FEATURE_ID=-1
107
+ MSLEVEL=2
108
+ RTINSECONDS=115.08
109
+ ADDUCT=[M+H]+
110
+ PEPMASS=498.07304
111
+ CHARGE=1
112
+ SPECTYPE=ALL_MSN_TO_PSEUDO_MS2
113
+ Collision energy=60.0
114
+ FRAGMENTATION_METHOD=HCD
115
+ ISOLATION_WINDOW=1.2000000476839432
116
+ Acquisition=Crude
117
+ INSTRUMENT_TYPE=Orbitrap
118
+ SOURCE_INSTRUMENT=Orbitrap ID-X
119
+ IMS_TYPE=none
120
+ ION_SOURCE=ESI
121
+ IONMODE=Positive
122
+ PI=Tomas Pluskal
123
+ DATACOLLECTOR=Corinna Brungs
124
+ DATASET_ID=MSVPLACEHOLDERID
125
+ USI=mzspec:MSVPLACEHOLDERID:20220601_pluskal_mce_1D1_A4_id.mzML:-1
126
+ SCANS=-1
127
+ PRECURSOR_PURITY=1.0
128
+ QUALITY_CHIMERIC=PASSED
129
+ QUALITY_EXPLAINED_INTENSITY=0.93082154
130
+ QUALITY_EXPLAINED_SIGNALS=0.88461536
131
+ Num peaks=52
132
+ 42.033703 6.933
133
+ 58.028778 11.37
134
+ 59.032055 2.116
135
+ 60.03299 1.268
136
+ 63.022655 2.006
137
+ 64.017876 2.374
138
+ 65.038353 0.923
139
+ 66.033494 2.134
140
+ 69.992175 1.938
141
+ 78.033541 4.097
142
+ 79.028745 1.257
143
+ 86.060081 9.959
144
+ 87.06353 3.769
145
+ 88.064438 1.499
146
+ 91.017532 3.789
147
+ 93.044386 7.213
148
+ 104.049591 5.292
149
+ 105.044537 4.776
150
+ 105.052979 3.494
151
+ 106.028389 1.142
152
+ 120.055412 7.778
153
+ 121.039436 6.751
154
+ 125.014962 2.948
155
+ 133.027011 2.074
156
+ 133.039425 3.979
157
+ 135.018616 0.908
158
+ 148.050389 4.721
159
+ 154.005085 2.139
160
+ 160.050642 3.311
161
+ 161.034588 1.467
162
+ 176.045663 22.882
163
+ 194.056314 24.336
164
+ 204.040476 24.707
165
+ 221.080734 1.76
166
+ 222.05116 100
167
+ 222.087845 1.473
168
+ 223.089127 1.679
169
+ 248.069031 1.01
170
+ 249.074326 2.424
171
+ 250.078537 2.556
172
+ 251.081848 1.513
173
+ 277.029205 1.552
174
+ 293.066956 1.309
175
+ 308.148193 1.723
176
+ 309.15033 3.711
177
+ 310.153687 1.904
178
+ 321.062225 1.269
179
+ 357.03932 4.785
180
+ 411.191315 1.423
181
+ 412.193907 5.738
182
+ 413.196318 8.005
183
+ 498.073273 1.812
184
+ END IONS
185
+
186
+ BEGIN IONS
187
+ NAME=1373215-15-6
188
+ DESCRIPTION=MCE bioactive compounds library
189
+ EXACTMASS=484.212198
190
+ FORMULA=C25H29FN4O5
191
+ INCHI=InChI=1S/C25H29FN4O5/c1-27-25(32)35-22-15-34-21-7-6-18(29-8-10-30(11-9-29)19-13-33-14-19)12-20(21)23(22)28-24(31)16-2-4-17(26)5-3-16/h2-7,12,19,22-23H,8-11,13-15H2,1H3,(H,27,32)(H,28,31)/t22-,23-/m0/s1
192
+ INCHIAUX=NDEBZCZEAVMSQF-GOTSBHOMSA-N
193
+ SMILES=CNC(=O)O[C@H]1COc2c(cc(N3CCN(C4COC4)CC3)cc2)[C@@H]1NC(=O)c1ccc(F)cc1
194
+ FEATURE_ID=-1
195
+ MSLEVEL=2
196
+ RTINSECONDS=77.81
197
+ ADDUCT=[M+H]+
198
+ PEPMASS=485.21947
199
+ CHARGE=1
200
+ SPECTYPE=SAME_ENERGY
201
+ Collision energy=30.0
202
+ FRAGMENTATION_METHOD=HCD
203
+ ISOLATION_WINDOW=1.2000000476839432
204
+ Acquisition=Crude
205
+ INSTRUMENT_TYPE=Orbitrap
206
+ SOURCE_INSTRUMENT=Orbitrap ID-X
207
+ IMS_TYPE=none
208
+ ION_SOURCE=ESI
209
+ IONMODE=Positive
210
+ PI=Tomas Pluskal
211
+ DATACOLLECTOR=Corinna Brungs
212
+ DATASET_ID=MSVPLACEHOLDERID
213
+ USI=mzspec:MSVPLACEHOLDERID:20220601_pluskal_mce_1D1_A8_id.mzML:-1
214
+ SCANS=-1
215
+ PRECURSOR_PURITY=1.0
216
+ QUALITY_CHIMERIC=PASSED
217
+ QUALITY_EXPLAINED_INTENSITY=1.0
218
+ QUALITY_EXPLAINED_SIGNALS=1.0
219
+ Num peaks=15
220
+ 70.064962 2.862
221
+ 84.080627 1.511
222
+ 109.076332 1.385
223
+ 114.091324 2.886
224
+ 123.023911 14.149
225
+ 161.059692 2.52
226
+ 202.086288 1.435
227
+ 230.105148 3.367
228
+ 241.133691 26.664
229
+ 261.159847 3.062
230
+ 271.144251 41.501
231
+ 289.154956 100
232
+ 346.175893 26.1
233
+ 351.136749 1.2
234
+ 410.186693 8.75
235
+ END IONS
236
+
237
+ BEGIN IONS
238
+ NAME=IPSU
239
+ DESCRIPTION=MCE bioactive compounds library
240
+ EXACTMASS=405.216475
241
+ FORMULA=C23H27N5O2
242
+ INCHI=InChI=1S/C23H27N5O2/c1-30-20-7-11-24-22(26-20)27-13-9-23(10-14-27)8-4-12-28(21(23)29)16-17-15-25-19-6-3-2-5-18(17)19/h2-3,5-7,11,15,25H,4,8-10,12-14,16H2,1H3
243
+ INCHIAUX=PCMHOSYCWRRHTG-UHFFFAOYSA-N
244
+ SMILES=COc1nc(N2CCC3(CCCN(Cc4c[nH]c5ccccc45)C3=O)CC2)ncc1
245
+ FEATURE_ID=660
246
+ MSLEVEL=2
247
+ RTINSECONDS=110.45
248
+ ADDUCT=[M+H]+
249
+ PEPMASS=406.22375
250
+ CHARGE=1
251
+ Collision energy=20.0
252
+ FRAGMENTATION_METHOD=HCD
253
+ ISOLATION_WINDOW=1.2000000476839432
254
+ Acquisition=Crude
255
+ INSTRUMENT_TYPE=Orbitrap
256
+ SOURCE_INSTRUMENT=Orbitrap ID-X
257
+ IMS_TYPE=none
258
+ ION_SOURCE=ESI
259
+ IONMODE=Positive
260
+ PI=Tomas Pluskal
261
+ DATACOLLECTOR=Corinna Brungs
262
+ DATASET_ID=MSVPLACEHOLDERID
263
+ USI=mzspec:MSVPLACEHOLDERID:20220601_pluskal_mce_1D1_A1_id.mzML:660
264
+ SCANS=660
265
+ PRECURSOR_PURITY=0.9731724062527856
266
+ QUALITY_CHIMERIC=PASSED
267
+ QUALITY_EXPLAINED_INTENSITY=0.99532574
268
+ QUALITY_EXPLAINED_SIGNALS=0.8888889
269
+ Num peaks=9
270
+ 45.134823 1.082
271
+ 45.13699 1.064
272
+ 110.096245 17.184
273
+ 130.064972 13.97
274
+ 136.111862 0.874
275
+ 277.16571 100
276
+ 289.165924 52.842
277
+ 307.177856 0.793
278
+ 406.223083 43.696
279
+ END IONS
280
+
281
+ BEGIN IONS
282
+ NAME=Vadimezan
283
+ DESCRIPTION=MCE bioactive compounds library
284
+ EXACTMASS=282.089209
285
+ FORMULA=C17H14O4
286
+ INCHI=InChI=1S/C17H14O4/c1-9-6-7-13-15(20)12-5-3-4-11(8-14(18)19)17(12)21-16(13)10(9)2/h3-7H,8H2,1-2H3,(H,18,19)
287
+ INCHIAUX=XGOYIMQSIKSOBS-UHFFFAOYSA-N
288
+ SMILES=Cc1c(C)c2c(cc1)c(=O)c1cccc(CC(=O)O)c1o2
289
+ FEATURE_ID=474
290
+ MSLEVEL=2
291
+ RTINSECONDS=113.94
292
+ ADDUCT=[M+H]+
293
+ PEPMASS=283.09648
294
+ CHARGE=1
295
+ Collision energy=60.0
296
+ FRAGMENTATION_METHOD=HCD
297
+ ISOLATION_WINDOW=1.2000000476839432
298
+ Acquisition=Crude
299
+ INSTRUMENT_TYPE=Orbitrap
300
+ SOURCE_INSTRUMENT=Orbitrap ID-X
301
+ IMS_TYPE=none
302
+ ION_SOURCE=ESI
303
+ IONMODE=Positive
304
+ PI=Tomas Pluskal
305
+ DATACOLLECTOR=Corinna Brungs
306
+ DATASET_ID=MSVPLACEHOLDERID
307
+ USI=mzspec:MSVPLACEHOLDERID:20220601_pluskal_mce_1D1_A2_id.mzML:474
308
+ SCANS=474
309
+ PRECURSOR_PURITY=0.973405752811273
310
+ QUALITY_CHIMERIC=PASSED
311
+ QUALITY_EXPLAINED_INTENSITY=0.99386805
312
+ QUALITY_EXPLAINED_SIGNALS=0.9444444
313
+ Num peaks=36
314
+ 91.053741 1.144
315
+ 105.069382 1.254
316
+ 141.069168 1.139
317
+ 152.061859 1.019
318
+ 153.069305 1.283
319
+ 155.085022 0.966
320
+ 158.033508 1.081
321
+ 165.069733 7.345
322
+ 166.077545 9.805
323
+ 167.085587 4.037
324
+ 168.093094 3.638
325
+ 178.077515 2.42
326
+ 179.085281 1.429
327
+ 181.06459 6.555
328
+ 181.101135 2.534
329
+ 183.116669 0.903
330
+ 190.078049 0.997
331
+ 191.085159 1.264
332
+ 193.101242 1.467
333
+ 194.07254 10.324
334
+ 195.080231 21.628
335
+ 196.087906 3.722
336
+ 208.088181 2.735
337
+ 209.059555 0.982
338
+ 209.096024 59.124
339
+ 209.119858 0.749
340
+ 210.104065 2.296
341
+ 221.096359 0.981
342
+ 222.067566 2.431
343
+ 223.0755 19.616
344
+ 225.091003 2.035
345
+ 236.083176 2.085
346
+ 237.090988 100
347
+ 238.098816 15.987
348
+ 239.106781 0.935
349
+ 283.096832 2.447
350
+ END IONS