{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T18:08:34Z","timestamp":1772042914199,"version":"3.50.1"},"reference-count":31,"publisher":"Oxford University Press (OUP)","issue":"6","license":[{"start":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T00:00:00Z","timestamp":1767139200000},"content-version":"vor","delay-in-days":60,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000049","name":"National Institute on Aging","doi-asserted-by":"publisher","award":["3U01AG024904-09S4"],"award-info":[{"award-number":["3U01AG024904-09S4"]}],"id":[{"id":"10.13039\/100000049","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000049","name":"National Institute on Aging","doi-asserted-by":"publisher","award":["RF1AG051550"],"award-info":[{"award-number":["RF1AG051550"]}],"id":[{"id":"10.13039\/100000049","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000049","name":"National Institute on Aging","doi-asserted-by":"publisher","award":["R01AG046171"],"award-info":[{"award-number":["R01AG046171"]}],"id":[{"id":"10.13039\/100000049","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Alzheimer's Disease Metabolomics Consortium"},{"name":"0-1 Original Exploration Category: Fundamental Research Funds for the Central Universities Project","award":["2022FRFK030025"],"award-info":[{"award-number":["2022FRFK030025"]}]},{"name":"Heilongjiang Provincial Science and Technology Tackling Project","award":["GNCMSSJH2024"],"award-info":[{"award-number":["GNCMSSJH2024"]}]},{"name":"Key Research and Development Program of Heilongjiang Province","award":["2022ZX02C20"],"award-info":[{"award-number":["2022ZX02C20"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62371161"],"award-info":[{"award-number":["62371161"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62331012"],"award-info":[{"award-number":["62331012"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2021YFF1200105"],"award-info":[{"award-number":["2021YFF1200105"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,11,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>Missing values in nuclear magnetic resonance metabolomics data compromise downstream clinical interpretation. Here, we present MetImputBERT, an imputation method based on a pretrained BERT framework. MetImputBERT uses the masks in the masked language model to simulate missing values and leverages predictions and reconstructions to these positions to simulate the imputation process. The learning of MetImputBERT is driven by minimizing the reconstruction error. MetImputBERT was pretrained on the largest metabolomics dataset to date, comprising data from over 230\u00a0000 individuals in the UK Biobank. When new datasets with missing values were encountered, MetImputBERT loaded the pretrained parameters and directly imputed the missing values by inferring their reconstructed estimates. MetImputBERT outperformed commonly used methods\u2014K-nearest neighbors, multiple imputation by chained equations, and singular value decomposition\u2014in imputation performance on two independent test sets. We provide an open-source Python tool that allows users to quickly impute missing values in their own NMR metabolomics data without any additional training.<\/jats:p>","DOI":"10.1093\/bib\/bbaf682","type":"journal-article","created":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T12:52:28Z","timestamp":1764679948000},"source":"Crossref","is-referenced-by-count":0,"title":["MetImputBERT: a pretrained BERT framework for missing value imputation in NMR metabolomics data"],"prefix":"10.1093","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0047-4199","authenticated-orcid":false,"given":"Shizheng","family":"Qiu","sequence":"first","affiliation":[{"name":"Faculty of Computing, Harbin Institute of Technology , 92 Xidazhi Street, Nangang District, Harbin, 150001 ,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4508-5365","authenticated-orcid":false,"given":"Yang","family":"Hu","sequence":"additional","affiliation":[{"name":"Faculty of Computing, Harbin Institute of Technology , 92 Xidazhi Street, Nangang District, Harbin, 150001 ,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"name":"the Alzheimer's Disease Neuroimaging Initiative","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1126-2888","authenticated-orcid":false,"given":"Guiyou","family":"Liu","sequence":"additional","affiliation":[{"name":"Beijing Institute of Brain Disorders, Laboratory of Brain Disorders, Ministry of Science and Technology, Collaborative Innovation Center for Brain Disorders, National Engineering Center of Internet Medical Diagnosis and Treatment Technology, Xuanwu Hospital, Capital Medical University , Beijing, 100069 ,","place":["China"]},{"name":"Department of Epidemiology and Biostatistics, School of Public Health, Wannan Medical College , No. 22, Wenchang Road, Wuhu, 241002 ,","place":["China"]},{"name":"Dongying Branch Center of Collaborative Innovation Center for Brain Disorders, Shengli Oilfield Central Hospital , No. 31 Jinan Road, Dongying, 257034 ,","place":["China"]},{"name":"Clinical Medicine Translational Research Institute, Chengdu Fifth People\u2019s Hospital, Geriatric Diseases Institute of Chengdu, The Second Clinical Medical College, Affiliated Fifth People\u2019s Hospital of Chengdu University of Traditional Chinese Medicine, Chengdu University of Traditional Chinese Medicine , No. 33, Ma Shi Street, Chengdu, 611137 ,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6500-6217","authenticated-orcid":false,"given":"Yadong","family":"Wang","sequence":"additional","affiliation":[{"name":"Faculty of Computing, Harbin Institute of Technology , 92 Xidazhi Street, Nangang District, Harbin, 150001 ,","place":["China"]},{"name":"Zhengzhou Research Institute, Harbin Institute of Technology , No. 26, Longyuan East 7th Street, Zhengdong New District, Zhengzhou, 450000 ,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2025,12,31]]},"reference":[{"key":"2026022512214637800_ref1","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/j.jmr.2019.07.013","article-title":"NMR metabolomics: a look ahead","volume":"306","author":"Wishart","year":"2019","journal-title":"J Magn Reson"},{"key":"2026022512214637800_ref2","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/978-3-030-51652-9_2","article-title":"NMR-based metabolomics","volume":"1280","author":"Nagana Gowda","year":"2021","journal-title":"Adv Exp Med Biol"},{"key":"2026022512214637800_ref3","doi-asserted-by":"publisher","first-page":"1084","DOI":"10.1093\/aje\/kwx016","article-title":"Quantitative serum nuclear magnetic resonance metabolomics in large-scale epidemiology: a primer on -Omic technologies","volume":"186","author":"Wurtz","year":"2017","journal-title":"Am J Epidemiol"},{"key":"2026022512214637800_ref4","doi-asserted-by":"publisher","DOI":"10.3390\/metabo13121181","article-title":"Technical report: a comprehensive comparison between different quantification versions of nightingale Health's (1)H-NMR metabolomics platform","volume":"13","author":"Bizzarri","year":"2023","journal-title":"Metabolites"},{"key":"2026022512214637800_ref5","doi-asserted-by":"publisher","first-page":"869","DOI":"10.1016\/j.nic.2005.09.008","article-title":"The Alzheimer's disease neuroimaging initiative","volume":"15","author":"Mueller","year":"2005","journal-title":"Neuroimaging Clin N Am"},{"key":"2026022512214637800_ref6","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/s41467-023-36231-7","article-title":"Atlas of plasma NMR biomarkers for health and disease in 118,461 individuals from the UK biobank","volume":"14","author":"Julkunen","year":"2023","journal-title":"Nat Commun"},{"key":"2026022512214637800_ref7","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1038\/s41586-024-07148-y","article-title":"Genome-wide characterization of circulating metabolic biomarkers","volume":"628","author":"Karjalainen","year":"2024","journal-title":"Nature"},{"key":"2026022512214637800_ref8","doi-asserted-by":"publisher","first-page":"620","DOI":"10.1016\/j.jacc.2017.12.006","article-title":"Lipids, lipoproteins, and metabolites and risk of myocardial infarction and stroke","volume":"71","author":"Holmes","year":"2018","journal-title":"J Am Coll Cardiol"},{"key":"2026022512214637800_ref9","doi-asserted-by":"publisher","first-page":"b2393","DOI":"10.1136\/bmj.b2393","article-title":"Multiple imputation for missing data in epidemiological and clinical research: potential and pitfalls","volume":"338","author":"Sterne","year":"2009","journal-title":"BMJ"},{"key":"2026022512214637800_ref10","doi-asserted-by":"publisher","DOI":"10.3390\/metabo10080319","article-title":"A multilevel Bayesian approach to improve effect size estimation in regression Modeling of metabolomics data utilizing imputation with uncertainty","volume":"10","author":"Gillies","year":"2020","journal-title":"Metabolites"},{"key":"2026022512214637800_ref11","doi-asserted-by":"publisher","first-page":"663","DOI":"10.1038\/s41598-017-19120-0","article-title":"Missing value imputation approach for mass spectrometry-based metabolomics data","volume":"8","author":"Wei","year":"2018","journal-title":"Sci Rep"},{"key":"2026022512214637800_ref12","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1186\/s12859-022-04659-1","article-title":"Mechanism-aware imputation: a two-step approach in handling missing values in metabolomics","volume":"23","author":"Dekermanjian","year":"2022","journal-title":"BMC Bioinformatics"},{"key":"2026022512214637800_ref13","doi-asserted-by":"publisher","first-page":"2309","DOI":"10.1038\/s41591-022-01980-3","article-title":"Metabolomic profiles predict individual multidisease outcomes","volume":"28","author":"Buergel","year":"2022","journal-title":"Nat Med"},{"key":"2026022512214637800_ref14","doi-asserted-by":"publisher","first-page":"103764","DOI":"10.1016\/j.ebiom.2021.103764","article-title":"1H-NMR metabolomics-based surrogates to impute common clinical risk factors and endpoints","volume":"75","author":"Bizzarri","year":"2022","journal-title":"EBioMedicine"},{"key":"2026022512214637800_ref15","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1093\/bioinformatics\/btr597","article-title":"MissForest--non-parametric missing value imputation for mixed-type data","volume":"28","author":"Stekhoven","year":"2012","journal-title":"Bioinformatics"},{"key":"2026022512214637800_ref16","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","volume":"1","author":"Devlin","year":"2019","journal-title":"2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Naacl Hlt 2019)"},{"key":"2026022512214637800_ref17","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems 30 (Nips 2017)"},{"key":"2026022512214637800_ref18","doi-asserted-by":"crossref","first-page":"eadf4428","DOI":"10.1126\/scitranslmed.adf4428","article-title":"Prospective study design and data analysis in UK biobank","volume":"16","author":"Allen","year":"2024","journal-title":"Sci Transl Med"},{"key":"2026022512214637800_ref19","doi-asserted-by":"publisher","first-page":"e1001779","DOI":"10.1371\/journal.pmed.1001779","article-title":"UK biobank: an open access resource for identifying the causes of a wide range of complex diseases of middle and old age","volume":"12","author":"Sudlow","year":"2015","journal-title":"PLoS Med"},{"key":"2026022512214637800_ref20","doi-asserted-by":"publisher","first-page":"10092","DOI":"10.1038\/s41467-024-54357-0","article-title":"Metabolomic and genomic prediction of common diseases in 700,217 participants in three national biobanks","volume":"15","author":"Nightingale Health Biobank Collaborative, G","year":"2024","journal-title":"Nat Commun"},{"key":"2026022512214637800_ref21","doi-asserted-by":"publisher","first-page":"192","DOI":"10.1161\/CIRCGENETICS.114.000216","article-title":"Quantitative serum nuclear magnetic resonance metabolomics in cardiovascular epidemiology and genetics","volume":"8","author":"Soininen","year":"2015","journal-title":"Circ Cardiovasc Genet"},{"key":"2026022512214637800_ref22","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1038\/s41586-023-06592-6","article-title":"Plasma proteomic associations with genetics and health in the UK biobank","volume":"622","author":"Sun","year":"2023","journal-title":"Nature"},{"key":"2026022512214637800_ref23","doi-asserted-by":"publisher","first-page":"e1002482","DOI":"10.1371\/journal.pmed.1002482","article-title":"Brain and blood metabolite signatures of pathology and progression in Alzheimer disease: a targeted metabolomics study","volume":"15","author":"Varma","year":"2018","journal-title":"PLoS Med"},{"key":"2026022512214637800_ref24","doi-asserted-by":"publisher","first-page":"170101","DOI":"10.1007\/s11432-024-4466-3","article-title":"Large language models transform biological research: from architecture to utilization","volume":"68","author":"Wang","year":"2025","journal-title":"SCIENCE CHINA Inf Sci"},{"key":"2026022512214637800_ref25","article-title":"Revisiting deep learning models for tabular data","volume":"34","author":"Gorishniy","year":"2021","journal-title":"Advances in Neural Information Processing Systems 34 (Neurips 2021)"},{"key":"2026022512214637800_ref26","doi-asserted-by":"publisher","first-page":"110442","DOI":"10.1016\/j.compbiomed.2025.110442","article-title":"DFT_ANPD: a dual-feature two-sided attention network for anticancer natural products detection","volume":"194","author":"Norouzi","year":"2025","journal-title":"Comput Biol Med"},{"key":"2026022512214637800_ref27","doi-asserted-by":"publisher","first-page":"113662","DOI":"10.1016\/j.eswa.2020.113662","article-title":"Incorporating part-whole hierarchies into fully convolutional network for scene parsing","volume":"160","author":"Abbasi","year":"2020","journal-title":"Expert Syst Appl"},{"key":"2026022512214637800_ref28","doi-asserted-by":"publisher","first-page":"2285","DOI":"10.1101\/gr.280584.125","article-title":"Efficient integration of spatial omics data for joint domain detection, matching, and alignment with stMSA","volume":"35","author":"Shu","year":"2025","journal-title":"Genome Res"},{"key":"2026022512214637800_ref29","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbaf303","article-title":"cfMethylPre: deep transfer learning enhances cancer detection based on circulating cell-free DNA methylation profiling","volume":"26","author":"Zhang","year":"2025","journal-title":"Brief Bioinform"},{"key":"2026022512214637800_ref30","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbaf086","article-title":"MAEST: accurately spatial domain detection in spatial transcriptomics with graph masked autoencoder","volume":"26","author":"Zhu","year":"2025","journal-title":"Brief Bioinform"},{"key":"2026022512214637800_ref31","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1186\/s12915-024-02055-0","article-title":"CRBPSA: CircRNA-RBP interaction sites identification using sequence structural attention model","volume":"22","author":"Cao","year":"2024","journal-title":"BMC Biol"}],"container-title":["Briefings in Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/26\/6\/bbaf682\/66168531\/bbaf682.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/26\/6\/bbaf682\/66168531\/bbaf682.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T17:21:55Z","timestamp":1772040115000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bib\/article\/doi\/10.1093\/bib\/bbaf682\/8407513"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,1]]},"references-count":31,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,11,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bib\/bbaf682","relation":{},"ISSN":["1467-5463","1477-4054"],"issn-type":[{"value":"1467-5463","type":"print"},{"value":"1477-4054","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2025,11]]},"published":{"date-parts":[[2025,11,1]]},"article-number":"bbaf682"}}