{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T22:59:46Z","timestamp":1777417186048,"version":"3.51.4"},"reference-count":76,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2023,8,17]],"date-time":"2023-08-17T00:00:00Z","timestamp":1692230400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,17]],"date-time":"2023-08-17T00:00:00Z","timestamp":1692230400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Machine Learning for Pharmaceutical Discovery and Synthesis consortium"},{"name":"Takeda Healthcare AI Fellowship"},{"DOI":"10.13039\/100000002","name":"U.S. Department of Health & Human Services | National Institutes of Health","doi-asserted-by":"publisher","award":["R01AI172147"],"award-info":[{"award-number":["R01AI172147"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"U.S. Department of Health & Human Services | National Institutes of Health","doi-asserted-by":"publisher","award":["P30DK043351"],"award-info":[{"award-number":["P30DK043351"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-023-00708-3","type":"journal-article","created":{"date-parts":[[2023,8,17]],"date-time":"2023-08-17T12:01:34Z","timestamp":1692273694000},"page":"965-979","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":61,"title":["Annotating metabolite mass spectra with domain-inspired chemical formula transformers"],"prefix":"10.1038","volume":"5","author":[{"given":"Samuel","family":"Goldman","sequence":"first","affiliation":[]},{"given":"Jeremy","family":"Wohlwend","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3064-1055","authenticated-orcid":false,"given":"Martin","family":"Stra\u017ear","sequence":"additional","affiliation":[]},{"given":"Guy","family":"Haroush","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5630-5167","authenticated-orcid":false,"given":"Ramnik J.","family":"Xavier","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8271-8723","authenticated-orcid":false,"given":"Connor W.","family":"Coley","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,17]]},"reference":[{"key":"708_CR1","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.ccr.2010.12.014","volume":"19","author":"W Xu","year":"2011","unstructured":"Xu, W. et al. Oncometabolite 2-hydroxyglutarate is a competitive inhibitor of \u03b1-ketoglutarate-dependent dioxygenases. Cancer Cell 19, 17\u201330 (2011).","journal-title":"Cancer Cell"},{"key":"708_CR2","doi-asserted-by":"publisher","first-page":"739","DOI":"10.1038\/nature08617","volume":"462","author":"L Dang","year":"2009","unstructured":"Dang, L. et al. Cancer-associated IDH1 mutations produce 2-hydroxyglutarate. Nature 462, 739\u2013744 (2009).","journal-title":"Nature"},{"key":"708_CR3","doi-asserted-by":"publisher","first-page":"1577","DOI":"10.1016\/j.molp.2019.11.005","volume":"12","author":"MP Torrens-Spence","year":"2019","unstructured":"Torrens-Spence, M. P. et al. PBS3 and EPS1 complete salicylic acid biosynthesis from isochorismate in Arabidopsis. Mol. Plant 12, 1577\u20131586 (2019).","journal-title":"Mol. Plant"},{"key":"708_CR4","doi-asserted-by":"publisher","first-page":"1819","DOI":"10.1152\/physrev.00035.2018","volume":"99","author":"DS Wishart","year":"2019","unstructured":"Wishart, D. S. Metabolomics for investigating physiological and pathophysiological processes. Physiol. Rev. 99, 1819\u20131875 (2019).","journal-title":"Physiol. Rev."},{"key":"708_CR5","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s11306-008-0152-0","volume":"5","author":"JG Bundy","year":"2009","unstructured":"Bundy, J. G., Davey, M. P. & Viant, M. R. Environmental metabolomics: a critical review and future perspectives. Metabolomics 5, 3\u201321 (2009).","journal-title":"Metabolomics"},{"key":"708_CR6","doi-asserted-by":"publisher","first-page":"458","DOI":"10.1038\/s41586-021-03832-5","volume":"599","author":"Y Sato","year":"2021","unstructured":"Sato, Y. et al. Novel bile acid biosynthetic pathways are enriched in the microbiome of centenarians. Nature 599, 458\u2013464 (2021).","journal-title":"Nature"},{"key":"708_CR7","doi-asserted-by":"publisher","first-page":"2779","DOI":"10.1007\/s00216-010-4142-5","volume":"398","author":"S Neumann","year":"2010","unstructured":"Neumann, S. & B\u00f6cker, S. Computational mass spectrometry for metabolomics: identification of metabolites and small molecules. Anal. Bioanal. Chem. 398, 2779\u20132788 (2010).","journal-title":"Anal. Bioanal. Chem."},{"key":"708_CR8","doi-asserted-by":"publisher","DOI":"10.1007\/s11306-022-01947-y","volume":"18","author":"W Bittremieux","year":"2022","unstructured":"Bittremieux, W., Wang, M. & Dorrestein, P. C. The critical role that spectral libraries play in capturing the metabolomics community knowledge. Metabolomics 18, 94 (2022).","journal-title":"Metabolomics"},{"key":"708_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cbpa.2021.04.005","volume":"65","author":"M AlQuraishi","year":"2021","unstructured":"AlQuraishi, M. Machine learning in protein structure prediction. Curr. Opin. Chem. Biol. 65, 1\u20138 (2021).","journal-title":"Curr. Opin. Chem. Biol."},{"key":"708_CR10","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","volume":"596","author":"J Jumper","year":"2021","unstructured":"Jumper, J. et al. Highly accurate protein structure prediction with AlphaFold. Nature 596, 583\u2013589 (2021).","journal-title":"Nature"},{"key":"708_CR11","doi-asserted-by":"publisher","first-page":"905","DOI":"10.1038\/s41592-020-0933-6","volume":"17","author":"L-F Nothias","year":"2020","unstructured":"Nothias, L.-F. et al. Feature-based molecular networking in the GNPS analysis environment. Nat. Methods 17, 905\u2013908 (2020).","journal-title":"Nat. Methods"},{"key":"708_CR12","doi-asserted-by":"publisher","first-page":"E1743","DOI":"10.1073\/pnas.1203689109","volume":"109","author":"J Watrous","year":"2012","unstructured":"Watrous, J. et al. Mass spectral molecular networking of living microbial colonies. Proc. Natl Acad. Sci. USA 109, E1743\u2013E1752 (2012).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"708_CR13","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1038\/s41586-020-2047-9","volume":"579","author":"RA Quinn","year":"2020","unstructured":"Quinn, R. A. et al. Global chemical effects of the microbiome include new bile-acid conjugations. Nature 579, 123\u2013129 (2020).","journal-title":"Nature"},{"key":"708_CR14","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-11-148","volume":"11","author":"S Wolf","year":"2010","unstructured":"Wolf, S., Schmidt, S., M\u00fcller-Hannemann, M. & Neumann, S. In silico fragmentation for computer assisted identification of metabolite mass spectra. BMC Bioinform. 11, 148 (2010).","journal-title":"BMC Bioinform."},{"key":"708_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13321-016-0115-9","volume":"8","author":"C Ruttkies","year":"2016","unstructured":"Ruttkies, C., Schymanski, E. L., Wolf, S., Hollender, J. & Neumann, S. MetFrag relaunched: incorporating strategies beyond in silico fragmentation. J. Cheminform. 8, 1\u201316 (2016).","journal-title":"J. Cheminform."},{"key":"708_CR16","doi-asserted-by":"publisher","first-page":"11692","DOI":"10.1021\/acs.analchem.1c01465","volume":"93","author":"F Wang","year":"2021","unstructured":"Wang, F. et al. CFM-ID 4.0: more accurate ESI-MS\/MS spectral prediction and compound identification. Anal. Chem. 93, 11692\u201311700 (2021).","journal-title":"Anal. Chem."},{"key":"708_CR17","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11306-014-0676-4","volume":"11","author":"F Allen","year":"2015","unstructured":"Allen, F., Greiner, R. & Wishart, D. Competitive fragmentation modeling of ESI-MS\/MS spectra for putative metabolite identification. Metabolomics 11, 98\u2013110 (2015).","journal-title":"Metabolomics"},{"key":"708_CR18","doi-asserted-by":"publisher","first-page":"12580","DOI":"10.1073\/pnas.1509788112","volume":"112","author":"K D\u00fchrkop","year":"2015","unstructured":"D\u00fchrkop, K., Shen, H., Meusel, M., Rousu, J. & B\u00f6cker, S. Searching molecular structure databases with tandem mass spectra using CSI:FingerID. Proc. Natl Acad. Sci. USA 112, 12580\u201312585 (2015).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"708_CR19","doi-asserted-by":"publisher","first-page":"484","DOI":"10.3390\/metabo3020484","volume":"3","author":"H Shen","year":"2013","unstructured":"Shen, H., Zamboni, N., Heinonen, M. & Rousu, J. Metabolite identification through machine learning-tackling CASMI challenge using FingerID. Metabolites 3, 484\u2013505 (2013).","journal-title":"Metabolites"},{"key":"708_CR20","unstructured":"Critical Assessment of Small Molecule Identification. CASMI http:\/\/www.casmi-contest.org\/2022\/index.shtml (2022)."},{"key":"708_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13321-017-0207-1","volume":"9","author":"EL Schymanski","year":"2017","unstructured":"Schymanski, E. L. et al. Critical Assessment of Small Molecule Identification 2016: automated methods. J. Cheminform. 9, 1\u201321 (2017).","journal-title":"J. Cheminform."},{"key":"708_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13321-016-0116-8","volume":"8","author":"S B\u00f6cker","year":"2016","unstructured":"B\u00f6cker, S. & D\u00fchrkop, K. Fragmentation trees reloaded. J. Cheminform. 8, 1\u201326 (2016).","journal-title":"J. Cheminform."},{"key":"708_CR23","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1038\/s41587-020-0740-8","volume":"39","author":"K D\u00fchrkop","year":"2021","unstructured":"D\u00fchrkop, K. et al. Systematic classification of unknown metabolites using high-resolution fragmentation mass spectra. Nat. Biotechnol. 39, 462\u2013471 (2021).","journal-title":"Nat. Biotechnol."},{"key":"708_CR24","doi-asserted-by":"publisher","first-page":"e1008920","DOI":"10.1371\/journal.pcbi.1008920","volume":"17","author":"G Hj\u00f6rleifsson Eldj\u00e1rn","year":"2021","unstructured":"Hj\u00f6rleifsson Eldj\u00e1rn, G. et al. Ranking microbial metabolomic and genomic links in the NPLinker framework using complementary scoring functions. PLoS Comput. Biol. 17, e1008920 (2021).","journal-title":"PLoS Comput. Biol."},{"key":"708_CR25","doi-asserted-by":"publisher","first-page":"865","DOI":"10.1038\/s41592-022-01486-3","volume":"19","author":"MA Stravs","year":"2022","unstructured":"Stravs, M. A., D\u00fchrkop, K., B\u00f6cker, S. & Zamboni, N. MSNovelist: de novo structure generation from mass spectra. Nat. Methods 19, 865\u2013870 (2022).","journal-title":"Nat. Methods"},{"key":"708_CR26","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1038\/s41587-021-01045-9","volume":"40","author":"MA Hoffmann","year":"2021","unstructured":"Hoffmann, M. A. et al. High-confidence structural annotation of metabolites absent from spectral libraries. Nat. Biotechnol. 40, 411\u2013421 (2021).","journal-title":"Nat. Biotechnol."},{"key":"708_CR27","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1038\/s41589-020-00677-3","volume":"17","author":"A Tripathi","year":"2021","unstructured":"Tripathi, A. et al. Chemically informed analyses of metabolomics mass spectrometry data with Qemistree. Nature Chem. Biol. 17, 146\u2013151 (2021).","journal-title":"Nature Chem. Biol."},{"key":"708_CR28","doi-asserted-by":"publisher","first-page":"e1008724","DOI":"10.1371\/journal.pcbi.1008724","volume":"17","author":"F Huber","year":"2021","unstructured":"Huber, F. et al. Spec2Vec: improved mass spectral similarity scoring through learning of structural relationships. PLoS Comput. Biol. 17, e1008724 (2021).","journal-title":"PLoS Comput. Biol."},{"key":"708_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13321-021-00558-4","volume":"13","author":"F Huber","year":"2021","unstructured":"Huber, F., van der Burg, S., van der Hooft, J. J. & Ridder, L. MS2DeepScore: a novel deep learning similarity measure to compare tandem mass spectra. J. Cheminform. 13, 1\u201314 (2021).","journal-title":"J. Cheminform."},{"key":"708_CR30","unstructured":"Voronov, G. et al. Multi-scale sinusoidal embeddings enable learning on high resolution mass spectrometry data. ICLR 2023 Machine Learning for Drug Discovery workshop (2023)."},{"key":"708_CR31","doi-asserted-by":"publisher","first-page":"700","DOI":"10.1021\/acscentsci.9b00085","volume":"5","author":"JN Wei","year":"2019","unstructured":"Wei, J. N., Belanger, D., Adams, R. P. & Sculley, D. Rapid prediction of electron-ionization mass spectrometry using neural networks. ACS Cent. Sci. 5, 700\u2013708 (2019).","journal-title":"ACS Cent. Sci."},{"key":"708_CR32","unstructured":"Li, X., Zhu, H., Liu, L.-P. & Hassoun, S. Ensemble Spectral Prediction (ESP) model for metabolite annotation. Preprint at https:\/\/arxiv.org\/abs\/2203.13783 (2022)."},{"key":"708_CR33","unstructured":"Young, A., Wang, B. & R\u00f6st, H. MassFormer: tandem mass spectrum prediction with graph transformers. Preprint at https:\/\/arxiv.org\/abs\/2111.04824 (2021)."},{"key":"708_CR34","doi-asserted-by":"publisher","first-page":"1793","DOI":"10.3390\/biom11121793","volume":"11","author":"AD Shrivastava","year":"2021","unstructured":"Shrivastava, A. D. et al. MassGenie: a transformer-based deep learning method for identifying small molecules from their mass spectra. Biomolecules 11, 1793 (2021).","journal-title":"Biomolecules"},{"key":"708_CR35","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1038\/s42004-023-00932-3","volume":"6","author":"EE Litsa","year":"2023","unstructured":"Litsa, E. E. et al. An end-to-end deep learning framework for translating mass spectra to de-novo molecules. Communications Chemistry 6, 132 (2023).","journal-title":"Communications Chemistry"},{"key":"708_CR36","doi-asserted-by":"publisher","DOI":"10.1007\/s11306-020-01726-7","volume":"16","author":"Z Fan","year":"2020","unstructured":"Fan, Z., Alley, A., Ghaffari, K. & Ressom, H. W. MetFID: artificial neural network-based compound fingerprint prediction for metabolite annotation. Metabolomics 16, 104 (2020).","journal-title":"Metabolomics"},{"key":"708_CR37","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1021\/ci00057a005","volume":"28","author":"D Weininger","year":"1988","unstructured":"Weininger, D. SMILES, a chemical language and information system. 1. Introduction to methodology and encoding rules. J. Chem. Inf. Comput. Sci. 28, 31\u201336 (1988).","journal-title":"J. Chem. Inf. Comput. Sci."},{"key":"708_CR38","doi-asserted-by":"publisher","first-page":"i342","DOI":"10.1093\/bioinformatics\/btac260","volume":"38","author":"K D\u00fchrkop","year":"2022","unstructured":"D\u00fchrkop, K. Deep kernel learning improves molecular fingerprint prediction from tandem mass spectra. Bioinformatics 38, i342\u2013i349 (2022).","journal-title":"Bioinformatics"},{"key":"708_CR39","unstructured":"Goldman, S. MIST software. Zenodo https:\/\/zenodo.org\/record\/8084088 (2022)."},{"key":"708_CR40","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1038\/s41592-019-0344-8","volume":"16","author":"K D\u00fchrkop","year":"2019","unstructured":"D\u00fchrkop, K. et al. SIRIUS 4: a rapid tool for turning tandem mass spectra into metabolite structure information. Nat. Methods 16, 299\u2013302 (2019).","journal-title":"Nat. Methods"},{"key":"708_CR41","unstructured":"Lee, J. et al. Set transformer: a framework for attention-based permutation-invariant neural networks. In International Conference on Machine Learning 3744\u20133753 (PMLR, 2019)."},{"key":"708_CR42","unstructured":"Vaswani, A. et al. Attention is all you need. Adv. Neural Inf. Process. Syst. 30, (2017)."},{"key":"708_CR43","doi-asserted-by":"publisher","first-page":"530","DOI":"10.1021\/jasms.1c00343","volume":"33","author":"A Aisporna","year":"2022","unstructured":"Aisporna, A. et al. Neutral loss mass spectral data enhances molecular similarity analysis in METLIN. J. Am. Soc. Mass Spectrom. 33, 530\u2013534 (2022).","journal-title":"J. Am. Soc. Mass Spectrom."},{"key":"708_CR44","unstructured":"Karras, T., Aila, T., Laine, S. & Lehtinen, J. Progressive growing of GANs for improved quality, stability, and variation. International Conference on Learning Representations (2018)."},{"key":"708_CR45","doi-asserted-by":"publisher","first-page":"S0033","DOI":"10.5702\/massspectrometry.S0033","volume":"3","author":"L Ridder","year":"2014","unstructured":"Ridder, L., van der Hooft, J. J. & Verhoeven, S. Automatic compound annotation from mass spectrometry data using MAGMa. Mass Spectrom. 3, S0033\u2013S0033 (2014).","journal-title":"Mass Spectrom."},{"key":"708_CR46","doi-asserted-by":"crossref","unstructured":"Xie, Q., Luong, M.-T., Hovy, E. & Le, Q. V. Self-training with noisy student improves imagenet classification. In Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition 10687\u201310698 (IEEE, 2020).","DOI":"10.1109\/CVPR42600.2020.01070"},{"key":"708_CR47","unstructured":"Tandem Mass Spectral Library (NIST, 2020); https:\/\/www.nist.gov\/programs-projects\/tandem-mass-spectral-library"},{"key":"708_CR48","unstructured":"MassBank of North America (MoNA, 2022); https:\/\/mona.fiehnlab.ucdavis.edu\/"},{"key":"708_CR49","doi-asserted-by":"publisher","first-page":"828","DOI":"10.1038\/nbt.3597","volume":"34","author":"M Wang","year":"2016","unstructured":"Wang, M. et al. Sharing and community curation of mass spectrometry data with Global Natural Products Social Molecular Networking. Nat. Biotechnol. 34, 828\u2013837 (2016).","journal-title":"Nat. Biotechnol."},{"key":"708_CR50","doi-asserted-by":"crossref","unstructured":"Ludwig, M., D\u00fchrkop, K. & B\u00f6cker, S. Bayesian networks for mass spectrometric metabolite identification via molecular fingerprints. Bioinformatics 34, i333\u2013i340 (2018).","DOI":"10.1093\/bioinformatics\/bty245"},{"key":"708_CR51","unstructured":"Oord, A. v. d., Li, Y. & Vinyals, O. Representation learning with contrastive predictive coding. Preprint at https:\/\/arxiv.org\/abs\/1807.03748 (2018)."},{"key":"708_CR52","doi-asserted-by":"crossref","unstructured":"Huber, J. et al. Matchms-processing and similarity evaluation of mass spectrometry data. J. Open Source Software 5, 2411 (2020).","DOI":"10.21105\/joss.02411"},{"key":"708_CR53","doi-asserted-by":"publisher","first-page":"861","DOI":"10.21105\/joss.00861","volume":"3","author":"L McInnes","year":"2018","unstructured":"McInnes, L. et al. UMAP: Uniform Manifold Approximation and Projection. J. Open Source Software 3, 861 (2018).","journal-title":"J. Open Source Software"},{"key":"708_CR54","doi-asserted-by":"publisher","first-page":"2795","DOI":"10.1021\/acs.jnatprod.1c00399","volume":"84","author":"HW Kim","year":"2021","unstructured":"Kim, H. W. et al. NPClassifier: a deep neural network-based structural classification tool for natural products. J. Nat. Prod. 84, 2795\u20132807 (2021).","journal-title":"J. Nat. Prod."},{"key":"708_CR55","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1038\/s41564-021-01050-3","volume":"7","author":"RH Mills","year":"2022","unstructured":"Mills, R. H. et al. Multi-omics analyses of the ulcerative colitis gut microbiome link Bacteroides vulgatus proteases with disease severity. Nat. Microbiol. 7, 262\u2013276 (2022).","journal-title":"Nat. Microbiol."},{"key":"708_CR56","doi-asserted-by":"publisher","first-page":"eabm3233","DOI":"10.1126\/science.abm3233","volume":"378","author":"Y Cao","year":"2022","unstructured":"Cao, Y. et al. Commensal microbiota from patients with inflammatory bowel disease produce genotoxic metabolites. Science 378, eabm3233 (2022).","journal-title":"Science"},{"key":"708_CR57","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1038\/s41564-018-0306-4","volume":"4","author":"EA Franzosa","year":"2019","unstructured":"Franzosa, E. A. et al. Gut microbiome structure and metabolic activity in inflammatory bowel disease. Nat. Microbiol. 4, 293\u2013305 (2019).","journal-title":"Nat. Microbiol."},{"key":"708_CR58","doi-asserted-by":"publisher","first-page":"600\u2013610.e4","DOI":"10.1016\/j.chom.2018.09.009","volume":"24","author":"M Schirmer","year":"2018","unstructured":"Schirmer, M. et al. Compositional and temporal changes in the gut microbiome of pediatric ulcerative colitis patients are linked to disease course. Cell Host Microbe 24, 600\u2013610.e4 (2018).","journal-title":"Cell Host Microbe"},{"key":"708_CR59","doi-asserted-by":"publisher","first-page":"1673","DOI":"10.1038\/s41564-022-01224-7","volume":"7","author":"DF Rojas-Tapias","year":"2022","unstructured":"Rojas-Tapias, D. F. et al. Inflammation-associated nitrate facilitates ectopic colonization of oral bacterium Veillonella parvula in the intestine. Nat. Microbiol. 7, 1673\u20131685 (2022).","journal-title":"Nat. Microbiol."},{"key":"708_CR60","doi-asserted-by":"publisher","first-page":"2848","DOI":"10.1038\/s41598-017-03220-y","volume":"7","author":"GA Bezerra","year":"2017","unstructured":"Bezerra, G. A. et al. Bacterial protease uses distinct thermodynamic signatures for substrate recognition. Sci. Rep. 7, 2848 (2017).","journal-title":"Sci. Rep."},{"key":"708_CR61","doi-asserted-by":"publisher","first-page":"25\u201337.e6","DOI":"10.1016\/j.chom.2017.06.007","volume":"22","author":"M Wlodarska","year":"2017","unstructured":"Wlodarska, M. et al. Indoleacrylic acid produced by commensal peptostreptococcus species suppresses inflammation. Cell Host Microbe 22, 25\u201337.e6 (2017).","journal-title":"Cell Host Microbe"},{"key":"708_CR62","doi-asserted-by":"publisher","first-page":"517","DOI":"10.3390\/metabo3030517","volume":"3","author":"EL Schymanski","year":"2013","unstructured":"Schymanski, E. L. & Neumann, S. The Critical Assessment of Small Molecule Identification (CASMI): challenges and solutions. Metabolites 3, 517\u2013538 (2013).","journal-title":"Metabolites"},{"key":"708_CR63","unstructured":"Landrum, G. RDKit: a software suite for cheminformatics, computational chemistry, and predictive modeling. Greg Landrum (2013)."},{"key":"708_CR64","doi-asserted-by":"crossref","unstructured":"Malisiewicz, T., Gupta, A. & Efros, A. A. Ensemble of exemplar-svms for object detection and beyond. In 2011 International conference on Computer Vision 89\u201396 (IEEE, 2011).","DOI":"10.1109\/ICCV.2011.6126229"},{"key":"708_CR65","doi-asserted-by":"publisher","first-page":"629","DOI":"10.1038\/s42256-020-00234-6","volume":"2","author":"M Ludwig","year":"2020","unstructured":"Ludwig, M. et al. Database-independent molecular formula annotation using Gibbs sampling through ZODIAC. Nat. Mach. Intell. 2, 629\u2013641 (2020).","journal-title":"Nat. Mach. Intell."},{"key":"708_CR66","doi-asserted-by":"publisher","first-page":"3503","DOI":"10.1021\/acs.jcim.2c00321","volume":"62","author":"Z Tu","year":"2022","unstructured":"Tu, Z. & Coley, C. W. Permutation invariant graph-to-sequence model for template-free retrosynthesis and reaction prediction. J. Chem. Inf. Model. 62, 3503\u20133513 (2022).","journal-title":"J. Chem. Inf. Model."},{"key":"708_CR67","doi-asserted-by":"crossref","unstructured":"Dai, Z. et al. Transformer-XL: attentive language models beyond a fixed-length context. Proc. 57th Ann. Meeting Assoc. Computational Linguistics. (2019).","DOI":"10.18653\/v1\/P19-1285"},{"key":"708_CR68","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1021\/ci100050t","volume":"50","author":"D Rogers","year":"2010","unstructured":"Rogers, D. & Hahn, M. Extended-connectivity fingerprints. J. Chem. Inf. Model. 50, 742\u2013754 (2010).","journal-title":"J. Chem. Inf. Model."},{"key":"708_CR69","unstructured":"Gutmann, M. & Hyv\u00e4rinen, A. Noise-contrastive estimation: a new estimation principle for unnormalized statistical models. In Proc. Thirteenth International Conference on Artificial Intelligence and Statistics 297\u2013304 (JMLR, 2010)."},{"key":"708_CR70","unstructured":"Liu, L. et al. On the Variance of the Adaptive Learning Rate and Beyond. Intern. Conf. on Learning Representations. (2019)."},{"key":"708_CR71","doi-asserted-by":"publisher","first-page":"D521","DOI":"10.1093\/nar\/gkl923","volume":"35","author":"DS Wishart","year":"2007","unstructured":"Wishart, D. S. et al. HMDB: the Human Metabolome Database. Nucleic Acids Res. 35, D521\u2013D526 (2007).","journal-title":"Nucleic Acids Res."},{"key":"708_CR72","unstructured":"Shinbo, Y. et al. KNApSAcK: A Comprehensive Species-Metabolite Relationship Database. In: Saito, K., Dixon, R.A., Willmitzer, L. (eds) Plant Metabolomics. Biotechnology in Agriculture and Forestry, (Springer, 2006)."},{"key":"708_CR73","doi-asserted-by":"crossref","unstructured":"Kanehisa, M. The KEGG database. In Novartis Foundation Symposium 91\u2013100 (Wiley Online Library, 2002).","DOI":"10.1002\/0470857897.ch8"},{"key":"708_CR74","doi-asserted-by":"publisher","first-page":"D1102","DOI":"10.1093\/nar\/gky1033","volume":"47","author":"S Kim","year":"2019","unstructured":"Kim, S. et al. PubChem 2019 update: improved access to chemical data. Nucleic Acids Res. 47, D1102\u2013D1109 (2019).","journal-title":"Nucleic Acids Res."},{"key":"708_CR75","doi-asserted-by":"publisher","first-page":"D622","DOI":"10.1093\/nar\/gkab1062","volume":"50","author":"DS Wishart","year":"2022","unstructured":"Wishart, D. S. et al. HMDB 5.0: the Human Metabolome Database for 2022. Nucleic Acids Res. 50, D622\u2013D631 (2022).","journal-title":"Nucleic Acids Res."},{"key":"708_CR76","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1038\/s41587-023-01690-2","volume":"41","author":"R Schmid","year":"2023","unstructured":"Schmid, R. et al. Integrative analysis of multimodal mass spectrometry data in MZmine 3. Nat. Biotech. 41, 447\u2013449 (2023).","journal-title":"Nat. Biotech."}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00708-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00708-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00708-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,19]],"date-time":"2023-09-19T19:04:04Z","timestamp":1695150244000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00708-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,17]]},"references-count":76,"journal-issue":{"issue":"9","published-online":{"date-parts":[[2023,9]]}},"alternative-id":["708"],"URL":"https:\/\/doi.org\/10.1038\/s42256-023-00708-3","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2022.12.30.522318","asserted-by":"object"}]},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,17]]},"assertion":[{"value":"14 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 July 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 August 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"C.W.C. is a scientific advisor to Enveda Therapeutics, Inc. R.J.X is a co-founder of Celsius Therapeutics and Jnana Therapeutics, Board of Directors at MoonLake Immunotherapeutics, and Scientific Advisory Board at Nestl\u00e9. The other authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}