{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T12:43:28Z","timestamp":1781613808343,"version":"3.54.5"},"reference-count":87,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100009708","name":"Novo Nordisk Fonden","doi-asserted-by":"publisher","award":["NNF16OC0020670)"],"award-info":[{"award-number":["NNF16OC0020670)"]}],"id":[{"id":"10.13039\/501100009708","id-type":"DOI","asserted-by":"publisher"}]},{"name":"PRO-MS: Danish National Mass Spectrometry Platform for Functional Proteomics"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>Mass spectrometry-based proteomics focuses on identifying the peptide that generates a tandem mass spectrum. Traditional methods rely on protein databases but are often limited or inapplicable in certain contexts. De novo peptide sequencing, which assigns peptide sequences to spectra without prior information, is valuable for diverse biological applications; however, owing to a lack of accuracy, it remains challenging to apply. Here we introduce InstaNovo, a transformer model that translates fragment ion peaks into peptide sequences. We demonstrate that InstaNovo outperforms state-of-the-art methods and showcase its utility in several applications. We also introduce InstaNovo+, a diffusion model that improves performance through iterative refinement of predicted sequences. Using these models, we achieve improved therapeutic sequencing coverage, discover novel peptides and detect unreported organisms in diverse datasets, thereby expanding the scope and detection rate of proteomics searches. Our models unlock opportunities across domains such as direct protein sequencing, immunopeptidomics and exploration of the dark proteome.<\/jats:p>","DOI":"10.1038\/s42256-025-01019-5","type":"journal-article","created":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T05:25:10Z","timestamp":1743398710000},"page":"565-579","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":33,"title":["InstaNovo enables diffusion-powered de novo peptide sequencing in large-scale proteomics experiments"],"prefix":"10.1038","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1355-8743","authenticated-orcid":false,"given":"Kevin","family":"Eloff","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3907-9281","authenticated-orcid":false,"given":"Konstantinos","family":"Kalogeropoulos","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7514-677X","authenticated-orcid":false,"given":"Amandla","family":"Mabona","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8702-1792","authenticated-orcid":false,"given":"Oliver","family":"Morell","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2983-9833","authenticated-orcid":false,"given":"Rachel","family":"Catzel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Esperanza","family":"Rivera-de-Torre","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jakob","family":"Berg Jespersen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wesley","family":"Williams","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sam P. B.","family":"van Beljouw","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2022-6766","authenticated-orcid":false,"given":"Marcin J.","family":"Skwark","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6918-5574","authenticated-orcid":false,"given":"Andreas Hougaard","family":"Laustsen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Stan J. J.","family":"Brouns","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2158-0601","authenticated-orcid":false,"given":"Anne","family":"Ljungars","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3117-7832","authenticated-orcid":false,"given":"Erwin M.","family":"Schoof","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4480-5567","authenticated-orcid":false,"given":"Jeroen","family":"Van Goey","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ulrich","family":"auf dem Keller","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Karim","family":"Beguir","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nicolas","family":"Lopez Carranza","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2979-5663","authenticated-orcid":false,"given":"Timothy P.","family":"Jenkins","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,3,31]]},"reference":[{"key":"1019_CR1","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1038\/nature19949","volume":"537","author":"R Aebersold","year":"2016","unstructured":"Aebersold, R. & Mann, M. Mass-spectrometric exploration of proteome structure and function. Nature 537, 347\u2013355 (2016).","journal-title":"Nature"},{"key":"1019_CR2","doi-asserted-by":"publisher","first-page":"2343","DOI":"10.1021\/cr3003533","volume":"113","author":"Y Zhang","year":"2013","unstructured":"Zhang, Y., Fonslow, B. R., Shan, B., Baek, M.-C. & Yates III, J. R. Protein analysis by shotgun\/bottom-up proteomics. Chem. Rev. 113, 2343\u20132394 (2013).","journal-title":"Chem. Rev."},{"key":"1019_CR3","doi-asserted-by":"publisher","first-page":"5383","DOI":"10.1021\/ac025747h","volume":"74","author":"A Keller","year":"2002","unstructured":"Keller, A., Nesvizhskii, A. I., Kolker, E. & Aebersold, R. Empirical statistical model to estimate the accuracy of peptide identifications made by MS\/MS and database search. Anal. Chem. 74, 5383\u20135392 (2002).","journal-title":"Anal. Chem."},{"key":"1019_CR4","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1038\/nmeth1019","volume":"4","author":"JE Elias","year":"2007","unstructured":"Elias, J. E. & Gygi, S. P. Target-decoy search strategy for increased confidence in large-scale protein identifications by mass spectrometry. Nat. Methods 4, 207\u2013214 (2007).","journal-title":"Nat. Methods"},{"key":"1019_CR5","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1074\/mcp.M112.026500","volume":"13","author":"NW Bateman","year":"2014","unstructured":"Bateman, N. W. et al. Maximizing peptide identification events in proteomic workflows using data-dependent acquisition (DDA). Mol. Cell. Proteomics 13, 329\u2013338 (2014).","journal-title":"Mol. Cell. Proteomics"},{"key":"1019_CR6","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1038\/nbt.2841","volume":"32","author":"HL R\u00f6st","year":"2014","unstructured":"R\u00f6st, H. L. et al. OpenSWATH enables automated, targeted analysis of data-independent acquisition MS data. Nat. Biotechnol. 32, 219\u2013223 (2014).","journal-title":"Nat. Biotechnol."},{"key":"1019_CR7","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1021\/pr700739d","volume":"7","author":"L K\u00e4ll","year":"2008","unstructured":"K\u00e4ll, L., Storey, J. D., MacCoss, M. J. & Noble, W. S. Posterior error probabilities and false discovery rates: two sides of the same coin. J. Proteome Res. 7, 40\u201344 (2008).","journal-title":"J. Proteome Res."},{"key":"1019_CR8","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-13-S16-S1","volume":"13","author":"K Ma","year":"2012","unstructured":"Ma, K., Vitek, O. & Nesvizhskii, A. I. A statistical model-building perspective to identification of MS\/MS spectra with PeptideProphet. BMC Bioinformatics 13, 1 (2012).","journal-title":"BMC Bioinformatics"},{"key":"1019_CR9","doi-asserted-by":"publisher","first-page":"1719","DOI":"10.1007\/s13361-016-1460-7","volume":"27","author":"M The","year":"2016","unstructured":"The, M., MacCoss, M. J., Noble, W. S. & K\u00e4ll, L. Fast and accurate protein false discovery rates on large-scale proteomics data sets with percolator 3.0. J. Am. Soc. Mass Spectrom. 27, 1719\u20131727 (2016).","journal-title":"J. Am. Soc. Mass Spectrom."},{"key":"1019_CR10","doi-asserted-by":"publisher","unstructured":"Chandramouli, K. & Qian, P.-Y. Proteomics: challenges, techniques and possibilities to overcome biological sample complexity. Hum. Genomics Proteomics https:\/\/doi.org\/10.4061\/2009\/239204 (2009).","DOI":"10.4061\/2009\/239204"},{"key":"1019_CR11","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1038\/nmeth725","volume":"1","author":"RG Sadygov","year":"2004","unstructured":"Sadygov, R. G., Cociorva, D. & Yates, J. R. III Large-scale database searching using tandem mass spectra: looking up the answer in the back of the book. Nat. Methods 1, 195\u2013202 (2004).","journal-title":"Nat. Methods"},{"key":"1019_CR12","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1038\/nbt.3267","volume":"33","author":"JM Chick","year":"2015","unstructured":"Chick, J. M. et al. A mass-tolerant database search identifies a large proportion of unassigned spectra in shotgun proteomics as modified peptides. Nat. Biotechnol. 33, 743\u2013749 (2015).","journal-title":"Nat. Biotechnol."},{"key":"1019_CR13","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1038\/nmeth.4256","volume":"14","author":"AT Kong","year":"2017","unstructured":"Kong, A. T., Leprevost, F. V., Avtonomov, D. M., Mellacheruvu, D. & Nesvizhskii, A. I. MSFragger: ultrafast and comprehensive peptide identification in mass spectrometry-based proteomics. Nat. Methods 14, 513\u2013520 (2017).","journal-title":"Nat. Methods"},{"key":"1019_CR14","doi-asserted-by":"publisher","unstructured":"Geiszler, D. J. et al. PTM-Shepherd: analysis and summarization of post-translational and chemical modifications from open search results. Mol. Cell. Proteomics https:\/\/doi.org\/10.1074\/mcp.TIR120.002216 (2021).","DOI":"10.1074\/mcp.TIR120.002216"},{"key":"1019_CR15","doi-asserted-by":"publisher","first-page":"7469","DOI":"10.1021\/acsomega.0c05997","volume":"6","author":"F Bugyi","year":"2021","unstructured":"Bugyi, F. et al. Influence of post-translational modifications on protein identification in database searches. ACS Omega 6, 7469\u20137477 (2021).","journal-title":"ACS Omega"},{"key":"1019_CR16","doi-asserted-by":"publisher","first-page":"2394","DOI":"10.1074\/mcp.M114.046995","volume":"14","author":"MM Savitski","year":"2015","unstructured":"Savitski, M. M., Wilhelm, M., Hahne, H., Kuster, B. & Bantscheff, M. A scalable approach for protein false discovery rate estimation in large proteomic data sets [s]. Mol. Cell. Proteomics 14, 2394\u20132404 (2015).","journal-title":"Mol. Cell. Proteomics"},{"key":"1019_CR17","doi-asserted-by":"publisher","first-page":"2172","DOI":"10.1021\/acs.jproteome.3c00176","volume":"22","author":"A Ebadi","year":"2023","unstructured":"Ebadi, A., Freestone, J., Noble, W. S. & Keich, U. Bridging the false discovery gap. J. Proteome Res. 22, 2172\u20132178 (2023).","journal-title":"J. Proteome Res."},{"key":"1019_CR18","doi-asserted-by":"publisher","first-page":"1700150","DOI":"10.1002\/pmic.201700150","volume":"18","author":"T Muth","year":"2018","unstructured":"Muth, T., Hartkopf, F., Vaudel, M. & Renard, B. Y. A potential golden age to come\u2014current tools, recent use cases, and future avenues for de novo sequencing in proteomics. Proteomics 18, 1700150 (2018).","journal-title":"Proteomics"},{"key":"1019_CR19","doi-asserted-by":"crossref","unstructured":"Hughes, C., Ma, B. & Lajoie, G. A. De novo sequencing methods in proteomics. In Proteome Bioinformatics. Methods in Molecular Biology Vol. 604 (eds Hubbard, S. & Jones, A.) 105\u2013121 (Humana Press, 2010).","DOI":"10.1007\/978-1-60761-444-9_8"},{"key":"1019_CR20","doi-asserted-by":"publisher","first-page":"964","DOI":"10.1021\/ac048788h","volume":"77","author":"A Frank","year":"2005","unstructured":"Frank, A. & Pevzner, P. PepNovo: de novo peptide sequencing via probabilistic network modeling. Anal. Chem. 77, 964\u2013973 (2005).","journal-title":"Anal. Chem."},{"key":"1019_CR21","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1002\/rcm.1196","volume":"17","author":"B Ma","year":"2003","unstructured":"Ma, B. et al. PEAKS: powerful software for peptide de novo sequencing by tandem mass spectrometry. Rapid Commun. Mass Spectrom. 17, 2337\u20132342 (2003).","journal-title":"Rapid Commun. Mass Spectrom."},{"key":"1019_CR22","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1002\/mas.21406","volume":"34","author":"KF Medzihradszky","year":"2015","unstructured":"Medzihradszky, K. F. & Chalkley, R. J. Lessons in de novo peptide sequencing by tandem mass spectrometry. Mass Spectrom. Rev. 34, 43\u201363 (2015).","journal-title":"Mass Spectrom. Rev."},{"key":"1019_CR23","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1021\/pr060271u","volume":"6","author":"AM Frank","year":"2007","unstructured":"Frank, A. M., Savitski, M. M., Nielsen, M. L., Zubarev, R. A. & Pevzner, P. A. De novo peptide sequencing and identification with precision mass spectrometry. J. Proteome Res. 6, 114\u2013123 (2007).","journal-title":"J. Proteome Res."},{"key":"1019_CR24","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1038\/s41592-019-0426-7","volume":"16","author":"S Gessulat","year":"2019","unstructured":"Gessulat, S. et al. Prosit: proteome-wide prediction of peptide tandem mass spectra by deep learning. Nat. Methods 16, 509\u2013518 (2019).","journal-title":"Nat. Methods"},{"key":"1019_CR25","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1038\/s41592-019-0638-x","volume":"17","author":"V Demichev","year":"2020","unstructured":"Demichev, V., Messner, C. B., Vernardis, S. I., Lilley, K. S. & Ralser, M. DIA-NN: neural networks and interference correction enable deep proteome coverage in high throughput. Nat. Methods 17, 41\u201344 (2020).","journal-title":"Nat. Methods"},{"key":"1019_CR26","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-021-23713-9","volume":"12","author":"M Wilhelm","year":"2021","unstructured":"Wilhelm, M. et al. Deep learning boosts sensitivity of mass spectrometry-based immunopeptidomics. Nat. Commun. 12, 3346 (2021).","journal-title":"Nat. Commun."},{"key":"1019_CR27","unstructured":"Yang, Y. et al. DPST: de novo peptide sequencing with amino-acid-aware transformers. Preprint at https:\/\/arxiv.org\/abs\/2203.13132 (2022)"},{"key":"1019_CR28","unstructured":"Ge, C. et al. DepPS: an improved deep learning model for de novo peptide sequencing. Preprint at https:\/\/arxiv.org\/abs\/2203.08820 (2022)"},{"key":"1019_CR29","doi-asserted-by":"publisher","unstructured":"Yilmaz, M. et al. Sequence-to-sequence translation from mass spectra to peptides with a transformer model. Nat Commun. https:\/\/doi.org\/10.1038\/s41467-024-49731-x (2024).","DOI":"10.1038\/s41467-024-49731-x"},{"key":"1019_CR30","doi-asserted-by":"publisher","first-page":"8247","DOI":"10.1073\/pnas.1705691114","volume":"114","author":"NH Tran","year":"2017","unstructured":"Tran, N. H., Zhang, X., Xin, L., Shan, B. & Li, M. De novo peptide sequencing by deep learning. Proc. Natl Acad. Sci. USA 114, 8247\u20138252 (2017).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"1019_CR31","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1038\/s41592-018-0260-3","volume":"16","author":"NH Tran","year":"2019","unstructured":"Tran, N. H. et al. Deep learning enables de novo peptide sequencing from data-independent-acquisition mass spectrometry. Nat. Methods 16, 63\u201366 (2019).","journal-title":"Nat. Methods"},{"key":"1019_CR32","doi-asserted-by":"publisher","first-page":"954","DOI":"10.1093\/bib\/bbx033","volume":"19","author":"T Muth","year":"2018","unstructured":"Muth, T. & Renard, B. Y. Evaluating de novo sequencing in proteomics: already an accurate alternative to database-driven peptide identification? Brief. Bioinformatics 19, 954\u2013970 (2018).","journal-title":"Brief. Bioinformatics"},{"key":"1019_CR33","unstructured":"Voronov, G. et al. Multi-scale sinusoidal embeddings enable learning on high resolution mass spectrometry data. Preprint at https:\/\/arxiv.org\/abs\/2207.02980 (2022)"},{"key":"1019_CR34","unstructured":"Kaplan, J. et al. Scaling laws for neural language models. Preprint at https:\/\/arxiv.org\/abs\/2001.08361 (2020)."},{"key":"1019_CR35","unstructured":"Tay, Y. et al. Scale efficiently: insights from pre-training and fine-tuning transformers. Preprint at https:\/\/arxiv.org\/abs\/2109.10686 (2022)."},{"key":"1019_CR36","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1038\/nmeth.4153","volume":"14","author":"DP Zolg","year":"2017","unstructured":"Zolg, D. P. et al. Building proteometools based on a complete synthetic human proteome. Nat. Methods 14, 259\u2013262 (2017).","journal-title":"Nat. Methods"},{"key":"1019_CR37","doi-asserted-by":"publisher","unstructured":"Karita, S. et al. A comparative study on transformer vs RNN in speech applications. In 2019 IEEE Automatic Speech Recognition and Understanding Workshop 449\u2013456 (2019); https:\/\/doi.org\/10.1109\/ASRU46091.2019.9003750","DOI":"10.1109\/ASRU46091.2019.9003750"},{"key":"1019_CR38","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1145\/3505244","volume":"54","author":"S Khan","year":"2022","unstructured":"Khan, S. et al. Transformers in vision: a survey. ACM Comput. Surv. 54, 200\u2013120041 (2022).","journal-title":"ACM Comput. Surv."},{"key":"1019_CR39","doi-asserted-by":"publisher","first-page":"102802","DOI":"10.1016\/j.media.2023.102802","volume":"88","author":"F Shamshad","year":"2023","unstructured":"Shamshad, F. et al. Transformers in medical imaging: a survey. Med. Image Anal. 88, 102802 (2023).","journal-title":"Med. Image Anal."},{"key":"1019_CR40","doi-asserted-by":"crossref","unstructured":"Wen, Q. et al. Transformers in time series: a survey. In Proc. Thirty-Second International Joint Conference on Artificial Intelligence (IJCAI-23) Survey Track https:\/\/www.ijcai.org\/proceedings\/2023\/0759.pdf (IJCAI, 2023).","DOI":"10.24963\/ijcai.2023\/759"},{"key":"1019_CR41","doi-asserted-by":"publisher","first-page":"021","DOI":"10.1093\/bib\/bbae021","volume":"25","author":"T Yang","year":"2024","unstructured":"Yang, T. et al. Introducing \u03c0-HelixNovo for practical large-scale de novo peptide sequencing. Brief. Bioinformatics 25, 021 (2024).","journal-title":"Brief. Bioinformatics"},{"key":"1019_CR42","doi-asserted-by":"publisher","first-page":"1250","DOI":"10.1038\/s42256-023-00738-x","volume":"5","author":"Z Mao","year":"2023","unstructured":"Mao, Z., Zhang, R., Xin, L. & Li, M. Mitigating the missing-fragmentation problem in de novo peptide sequencing with a two-stage graph-based deep learning model. Nat. Mach. Intell. 5, 1250\u20131260 (2023).","journal-title":"Nat. Mach. Intell."},{"key":"1019_CR43","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N. & Ganguli, S. Deep unsupervised learning using nonequilibrium thermodynamics. In Proc. 32nd International Conference on Machine Learning: Proceedings of Machine Learning Research Vol. 37 (eds Bach, F. & Blei, D.) 2256\u20132265 (PMLR, 2015); https:\/\/proceedings.mlr.press\/v37\/sohl-dickstein15.html"},{"key":"1019_CR44","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A. & Abbeel, P. Denoising diffusion probabilistic models. Adv. Neural Inf. Process. Syst. 33, 6840\u20136851 (2020).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1019_CR45","unstructured":"Dhariwal, P. & Nichol, A. Diffusion models beat gans on image synthesis. In 35th Conference on Neural Information Processing Systems https:\/\/proceedings.nips.cc\/paper\/2021\/file\/49ad23d1ec9fa4bd8d77d02681df5cfa-Paper.pdf (NeurIPS, 2021)."},{"key":"1019_CR46","doi-asserted-by":"publisher","unstructured":"Maz\u00e9, F. & Ahmed, F. Diffusion models beat gans on topology optimization. In Proc. AAAI Conference on Artificial Intelligence https:\/\/doi.org\/10.1609\/aaai.v37i8.26093 (AAAI, 2023).","DOI":"10.1609\/aaai.v37i8.26093"},{"key":"1019_CR47","doi-asserted-by":"crossref","unstructured":"Baas, M., Eloff, K. & Kamper, H. Transfusion: transcribing speech with multinomial diffusion. In Artificial Intelligence Research (eds Pillay, A., Jembere, E. & Gerber, A.) 231\u2013245 (Springer, 2022).","DOI":"10.1007\/978-3-031-22321-1_16"},{"key":"1019_CR48","doi-asserted-by":"publisher","first-page":"420","DOI":"10.1038\/s42256-021-00304-3","volume":"3","author":"R Qiao","year":"2021","unstructured":"Qiao, R. et al. Computationally instrument-resolution-independent de novo peptide sequencing for high-resolution devices. Na. Mach. Intell. 3, 420\u2013425 (2021).","journal-title":"Na. Mach. Intell."},{"key":"1019_CR49","doi-asserted-by":"publisher","first-page":"542","DOI":"10.1093\/bib\/bbac542","volume":"24","author":"D Beslic","year":"2023","unstructured":"Beslic, D., Tscheuschner, G., Renard, B. Y., Weller, M. G. & Muth, T. Comprehensive evaluation of peptide de novo sequencing tools for monoclonal antibody assembly. Brief. Bioinformatics 24, 542 (2023).","journal-title":"Brief. Bioinformatics"},{"key":"1019_CR50","doi-asserted-by":"publisher","unstructured":"Mikosi\u0144ski, J. et al. Longitudinal evaluation of biomarkers in wound fluids from venous leg ulcers and split-thickness skin graft donor site wounds treated with a protease-modulating wound dressing. Acta Derm. Venereol. https:\/\/doi.org\/10.2340\/actadv.v102.325 (2022).","DOI":"10.2340\/actadv.v102.325"},{"key":"1019_CR51","doi-asserted-by":"publisher","DOI":"10.1093\/gigascience\/giac121","volume":"11","author":"GTT Nguyen","year":"2022","unstructured":"Nguyen, G. T. T. et al. High-throughput proteomics and in vitro functional characterization of the 26 medically most important elapids and vipers from sub-Saharan Africa. GigaScience 11, 121 (2022).","journal-title":"GigaScience"},{"key":"1019_CR52","doi-asserted-by":"publisher","first-page":"298","DOI":"10.1038\/s41568-022-00446-5","volume":"22","author":"DR Mani","year":"2022","unstructured":"Mani, D. R. et al. Cancer proteogenomics: current impact and future prospects. Nat. Rev. Cancer 22, 298\u2013313 (2022).","journal-title":"Nat. Rev. Cancer"},{"key":"1019_CR53","doi-asserted-by":"publisher","DOI":"10.1038\/s41522-020-0123-4","volume":"6","author":"S Long","year":"2020","unstructured":"Long, S. et al. Metaproteomics characterizes human gut microbiome function in colorectal cancer. npj Biofilms Microbiomes 6, 14 (2020).","journal-title":"npj Biofilms Microbiomes"},{"key":"1019_CR54","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-021-24030-x","volume":"12","author":"I Bludau","year":"2021","unstructured":"Bludau, I. et al. Systematic detection of functional proteoform groups from bottom-up proteomic datasets. Nat. Commun. 12, 3810 (2021).","journal-title":"Nat. Commun."},{"key":"1019_CR55","doi-asserted-by":"publisher","first-page":"714","DOI":"10.1038\/s41592-023-01830-1","volume":"20","author":"RG Huffman","year":"2023","unstructured":"Huffman, R. G. et al. Prioritized mass spectrometry increases the depth, sensitivity and data completeness of single-cell proteomics. Nat. Methods 20, 714\u2013722 (2023).","journal-title":"Nat. Methods"},{"key":"1019_CR56","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-021-27778-4","volume":"13","author":"ST Gebreyesus","year":"2022","unstructured":"Gebreyesus, S. T. et al. Streamlined single-cell proteomics by an integrated microfluidic chip and data-independent acquisition mass spectrometry. Nat. Commun. 13, 37 (2022).","journal-title":"Nat. Commun."},{"key":"1019_CR57","doi-asserted-by":"publisher","first-page":"258","DOI":"10.1038\/nmeth.3255","volume":"12","author":"C-C Tsou","year":"2015","unstructured":"Tsou, C.-C. et al. DIA-Umpire: comprehensive computational framework for data-independent acquisition proteomics. Nat. Methods 12, 258\u2013264 (2015).","journal-title":"Nat. Methods"},{"key":"1019_CR58","doi-asserted-by":"publisher","unstructured":"Gillet, L. C. et al. Targeted data extraction of the ms\/ms spectra generated by data-independent acquisition: a new concept for consistent and accurate proteome analysis. Mol. Cell. Proteomics https:\/\/doi.org\/10.1074\/mcp.O111.016717 (2012).","DOI":"10.1074\/mcp.O111.016717"},{"key":"1019_CR59","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-30867-7","volume":"13","author":"L Xin","year":"2022","unstructured":"Xin, L. et al. A streamlined platform for analyzing tera-scale DDA and DIA mass spectrometry data enables highly sensitive immunopeptidomics. Nat. Commun. 13, 3108 (2022).","journal-title":"Nat. Commun."},{"key":"1019_CR60","doi-asserted-by":"publisher","unstructured":"Zolg, D. P. et al. Inferys rescoring: boosting peptide identifications and scoring confidence of database search results. Rapid Commun. Mass Spectrom. https:\/\/doi.org\/10.1002\/rcm.9128 (2021).","DOI":"10.1002\/rcm.9128"},{"key":"1019_CR61","doi-asserted-by":"publisher","first-page":"2301","DOI":"10.1038\/nprot.2016.136","volume":"11","author":"S Tyanova","year":"2016","unstructured":"Tyanova, S., Temu, T. & Cox, J. The MaxQuant computational platform for mass spectrometry-based shotgun proteomics. Nat. Protoc. 11, 2301\u20132319 (2016).","journal-title":"Nat. Protoc."},{"key":"1019_CR62","doi-asserted-by":"crossref","unstructured":"Adusumilli, R. & Mallick, P. Data conversion with proteowizard msconvert. In Proteomics Methods in Molecular Biology Vol 1550 (eds Comai, L., Katz, J. & Mallick, P.) 339\u2013368 (Humana Press, 2017).","DOI":"10.1007\/978-1-4939-6747-6_23"},{"key":"1019_CR63","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1002\/pmic.201300246","volume":"14","author":"HL R\u00f6st","year":"2014","unstructured":"R\u00f6st, H. L., Schmitt, U., Aebersold, R. & Malmstr\u00f6m, L. pyOpenMS: a Python-based interface to the openms mass-spectrometry algorithm library. Proteomics 14, 74\u201377 (2014).","journal-title":"Proteomics"},{"key":"1019_CR64","unstructured":"Vaswani, A. et al. Attention is all you need. In Proc. 31st International Conference on Neural Information Processing Systems https:\/\/papers.nips.cc\/paper_files\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf (NeurIPS, 2017)."},{"key":"1019_CR65","unstructured":"Paszke, A. et al. PyTorch: an imperative style, high-performance deep learning library. Adv. Neural Inf. Process. Syst. 32, 8024\u20138035 (2019)."},{"key":"1019_CR66","doi-asserted-by":"publisher","unstructured":"Falcon, W. PyTorchLightning\/pytorch-lightning: 0.7.6 release. Zenodo https:\/\/doi.org\/10.5281\/zenodo.3828935 (2020).","DOI":"10.5281\/zenodo.3828935"},{"key":"1019_CR67","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1002\/bms.1200111109","volume":"11","author":"P Roepstorff","year":"1984","unstructured":"Roepstorff, P. & Fohlman, J. Proposal for a common nomenclature for sequence ions in mass spectra of peptides. Biomed. Mass Spectrom. 11, 601 (1984).","journal-title":"Biomed. Mass Spectrom."},{"key":"1019_CR68","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1016\/j.ymeth.2004.08.013","volume":"35","author":"VH Wysocki","year":"2005","unstructured":"Wysocki, V. H., Resing, K. A., Zhang, Q. & Cheng, G. Mass spectrometry of peptides and proteins. Methods 35, 211\u2013222 (2005).","journal-title":"Methods"},{"key":"1019_CR69","doi-asserted-by":"publisher","first-page":"1399","DOI":"10.1002\/1096-9888(200012)35:12<1399::AID-JMS86>3.0.CO;2-R","volume":"35","author":"VH Wysocki","year":"2000","unstructured":"Wysocki, V. H., Tsaprailis, G., Smith, L. L. & Breci, L. A. Mobile and localized protons: a framework for understanding peptide dissociation. J. Mass Spectrom. 35, 1399\u20131406 (2000).","journal-title":"J. Mass Spectrom."},{"key":"1019_CR70","doi-asserted-by":"publisher","first-page":"699","DOI":"10.1038\/nrm1468","volume":"5","author":"H Steen","year":"2004","unstructured":"Steen, H. & Mann, M. The abc\u2019s (and xyz\u2019s) of peptide sequencing. Nat. Rev. Mol. Cell Biol. 5, 699\u2013711 (2004).","journal-title":"Nat. Rev. Mol. Cell Biol."},{"key":"1019_CR71","unstructured":"Hoogeboom, E., Nielsen, D., Jaini, P., Forr\u00e9, P. & Welling, M. Argmax flows and multinomial diffusion: learning categorical distributions. In 35th Conference on Neural Information Processing Systems https:\/\/openreview.net\/pdf?id=6nbpPqUCIi7 (NeurIPS, 2021)."},{"key":"1019_CR72","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1038\/s41596-018-0082-x","volume":"14","author":"CS Hughes","year":"2019","unstructured":"Hughes, C. S. et al. Single-pot, solid-phase-enhanced sample preparation for proteomics experiments. Nat. Protoc. 14, 68\u201385 (2019).","journal-title":"Nat. Protoc."},{"key":"1019_CR73","doi-asserted-by":"publisher","first-page":"2346","DOI":"10.1021\/acs.jproteome.9b00082","volume":"18","author":"JR Krieger","year":"2019","unstructured":"Krieger, J. R. et al. Evosep one enables robust deep proteome coverage using tandem mass tags while significantly reducing instrument time. J. Proteome Res. 18, 2346\u20132353 (2019).","journal-title":"J. Proteome Res."},{"key":"1019_CR74","doi-asserted-by":"publisher","first-page":"15","DOI":"10.3390\/proteomes9010015","volume":"9","author":"BC Orsburn","year":"2021","unstructured":"Orsburn, B. C. Proteome discoverer\u2014a community enhanced data processing suite for protein informatics. Proteomes 9, 15 (2021).","journal-title":"Proteomes"},{"key":"1019_CR75","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1021\/pr0498638","volume":"4","author":"W-J Qian","year":"2005","unstructured":"Qian, W.-J. et al. Probability-based evaluation of peptide and protein identifications from tandem mass spectrometry and sequest analysis: the human proteome. J. Proteome Res. 4, 53\u201362 (2005).","journal-title":"J. Proteome Res."},{"key":"1019_CR76","doi-asserted-by":"publisher","first-page":"1275","DOI":"10.1111\/j.1462-2920.2012.02774.x","volume":"15","author":"J van de Vossenberg","year":"2013","unstructured":"van de Vossenberg, J. et al. The metagenome of the marine anammox bacterium \u2018Candidatus scalindua profunda\u2019 illustrates the versatility of this globally important nitrogen cycle bacterium. Environ. Microbiol. 15, 1275\u20131289 (2013).","journal-title":"Environ. Microbiol."},{"key":"1019_CR77","doi-asserted-by":"publisher","first-page":"966","DOI":"10.1093\/bioinformatics\/btq054","volume":"26","author":"B MacLean","year":"2010","unstructured":"MacLean, B. et al. Skyline: an open source document editor for creating and analyzing targeted proteomics experiments. Bioinformatics 26, 966\u2013968 (2010).","journal-title":"Bioinformatics"},{"key":"1019_CR78","doi-asserted-by":"publisher","first-page":"844","DOI":"10.1128\/AEM.66.2.844-849.2000","volume":"66","author":"G Sabat","year":"2000","unstructured":"Sabat, G., Rose, P. E., Hickey, W. J. & Harkin, J. M. Selective and sensitive method for pcr amplification of Escherichia coli 16S rRNA genes in soil. Appl. Environ. Microbiol. 66, 844\u2013849 (2000).","journal-title":"Appl. Environ. Microbiol."},{"key":"1019_CR79","doi-asserted-by":"publisher","first-page":"2074","DOI":"10.1128\/JCM.42.5.2074-2079.2004","volume":"42","author":"T Spilker","year":"2004","unstructured":"Spilker, T., Coenye, T., Vandamme, P. A. & Lipuma, J. J. PCR-based assay for differentiation of Pseudomonas aeruginosa from other Pseudomonas species recovered from cystic fibrosis patients. J. Clin. Microbiol. 42, 2074\u20132079 (2004).","journal-title":"J. Clin. Microbiol."},{"key":"1019_CR80","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-40129-9","volume":"14","author":"KL Yang","year":"2023","unstructured":"Yang, K. L. et al. MSBooster: improving peptide identification rates using deep learning-based features. Nat. Commun. 14, 4539 (2023).","journal-title":"Nat. Commun."},{"key":"1019_CR81","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1093\/nar\/gkab1038","volume":"50","author":"Y Perez-Riverol","year":"2022","unstructured":"Perez-Riverol, Y. et al. The PRIDE database resources in 2022: a hub for mass spectrometry-based proteomics evidences. Nucleic Acids Res. 50, 543\u2013552 (2022).","journal-title":"Nucleic Acids Res."},{"key":"1019_CR82","doi-asserted-by":"publisher","unstructured":"Jenkins, T., Kalogeropoulos, K. & Eloff, K. InstaNovo: supplementary files supporting the data pre-processing, tool usage, and analysis performed on 8 different application-centric datasets. figshare https:\/\/doi.org\/10.6084\/m9.figshare.24173889.v1 (2023).","DOI":"10.6084\/m9.figshare.24173889.v1"},{"key":"1019_CR83","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1074\/mcp.TIR117.000383","volume":"17","author":"C Chong","year":"2018","unstructured":"Chong, C. et al. High-throughput and sensitive immunopeptidomics platform reveals profound interferon \u03b3-mediated remodeling of the human leukocyte antigen (HLA) ligandome. Mol. Cell. Proteomics 17, 533\u2013548 (2018).","journal-title":"Mol. Cell. Proteomics"},{"key":"1019_CR84","doi-asserted-by":"publisher","unstructured":"Beslic, D., Tscheuschner, G., Weller, M. G., Renard, B. Y. & Muth, T. Supplementary data for \u2018Comprehensive evaluation of peptide de novo sequencing tools for monoclonal antibody assembly\u2019. figshare https:\/\/doi.org\/10.6084\/m9.figshare.21394143.v1 (2022).","DOI":"10.6084\/m9.figshare.21394143.v1"},{"key":"1019_CR85","doi-asserted-by":"publisher","unstructured":"Eloff, K., Mabona, A., Catzel, R. & Van Goey, J. InstaNovo (Version 1.0.1). Zenodo https:\/\/doi.org\/10.5281\/zenodo.14712454 (2025).","DOI":"10.5281\/zenodo.14712454"},{"key":"1019_CR86","doi-asserted-by":"publisher","unstructured":"InstaDeep Ltd ms_ninespecies_benchmark (Revision b16a565). Hugging Face https:\/\/doi.org\/10.57967\/hf\/3821 (2024).","DOI":"10.57967\/hf\/3821"},{"key":"1019_CR87","doi-asserted-by":"publisher","unstructured":"InstaDeep Ltd ms_proteometools (Revision c30786d). Hugging Face https:\/\/doi.org\/10.57967\/hf\/3822 (2024).","DOI":"10.57967\/hf\/3822"}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-025-01019-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-025-01019-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-025-01019-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T01:04:02Z","timestamp":1749517442000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-025-01019-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":87,"journal-issue":{"issue":"4","published-online":{"date-parts":[[2025,4]]}},"alternative-id":["1019"],"URL":"https:\/\/doi.org\/10.1038\/s42256-025-01019-5","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-3376248\/v1","asserted-by":"object"}]},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,31]]},"assertion":[{"value":"10 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 February 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 March 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"K.E., A.M., J.V.G., W.W., M.J.S., R.C., K.B. and N.L.C. are or were employees of InstaDeep, 5 Merchant Square, London, UK. The other authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}