{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T00:55:22Z","timestamp":1775782522566,"version":"3.50.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFF0712400"],"award-info":[{"award-number":["2022YFF0712400"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Nature Science Foundation of China","doi-asserted-by":"crossref","award":["12201580"],"award-info":[{"award-number":["12201580"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Key Research Project of Zhejiang Lab","award":["2022NF0AC01"],"award-info":[{"award-number":["2022NF0AC01"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2024,2]]},"DOI":"10.1007\/s10489-024-05295-3","type":"journal-article","created":{"date-parts":[[2024,2,12]],"date-time":"2024-02-12T07:02:02Z","timestamp":1707721322000},"page":"2812-2830","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Effects of single and multiple imputation strategies on addressing over-fitting issues caused by imbalanced data from various scenarios"],"prefix":"10.1007","volume":"54","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5055-8729","authenticated-orcid":false,"given":"Jiaxi","family":"Yang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2909-8526","authenticated-orcid":false,"given":"Yihan","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1165-1314","authenticated-orcid":false,"given":"Ye","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4534-2904","authenticated-orcid":false,"given":"Kai","family":"Ding","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2680-5774","authenticated-orcid":false,"given":"Chongning","family":"Na","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7007-9071","authenticated-orcid":false,"given":"Yao","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,12]]},"reference":[{"issue":"3","key":"5295_CR1","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1093\/biomet\/63.3.581","volume":"63","author":"DB Rubin","year":"1976","unstructured":"Rubin DB (1976) Inference and missing data. Biometrika 63(3):581\u2013592","journal-title":"Biometrika"},{"key":"5295_CR2","doi-asserted-by":"crossref","unstructured":"Little RJ, Rubin DB (2019) Statistical analysis with missing data 793. John Wiley & Sons","DOI":"10.1002\/9781119482260"},{"key":"5295_CR3","doi-asserted-by":"crossref","unstructured":"Miao X, Wu Y, Chen L, Gao Y, Yin J (2022) An experimental survey of missing data imputation algorithms. IEEE Trans Knowl Data Eng","DOI":"10.1109\/TKDE.2022.3186498"},{"key":"5295_CR4","unstructured":"Josse J, Prost N, Scornet E, Varoquaux G (2019) On the consistency of supervised learning with missing values. Preprint arXiv:1902.06931"},{"issue":"1","key":"5295_CR5","first-page":"15","volume":"13","author":"DF Swayne","year":"1998","unstructured":"Swayne DF, Buja A (1998) Missing data in interactive high-dimensional data visualization. Comput Stat 13(1):15\u201326","journal-title":"Comput Stat"},{"issue":"1","key":"5295_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12859-014-0346-6","volume":"15","author":"SG Liao","year":"2014","unstructured":"Liao SG, Lin Y, Kang DD, Chandra D, Bon J, Kaminski N, Sciurba FC, Tseng GC (2014) Missing value imputation in high-dimensional phenomic data: imputable or not, and how? BMC Bioinforma 15(1):1\u201312","journal-title":"BMC Bioinforma"},{"issue":"7","key":"5295_CR7","doi-asserted-by":"publisher","first-page":"950","DOI":"10.1016\/j.patrec.2008.01.010","volume":"29","author":"BE Twala","year":"2008","unstructured":"Twala BE, Jones M, Hand DJ (2008) Good methods for coping with missing data in decision trees. Pattern Recogn Lett 29(7):950-956","journal-title":"Pattern Recogn Lett"},{"key":"5295_CR8","doi-asserted-by":"crossref","unstructured":"Deng Y, Lumley T (2023) Multiple imputation through XGBoost. J Comput Graph Stat (just-accepted), 1\u201318","DOI":"10.1080\/10618600.2023.2252501"},{"issue":"11","key":"5295_CR9","doi-asserted-by":"publisher","first-page":"2541","DOI":"10.1016\/j.jss.2012.05.073","volume":"85","author":"S Zhang","year":"2012","unstructured":"Zhang S (2012) Nearest neighbor selection for iteratively KNN imputation. J Syst Softw 85(11):2541\u20132552","journal-title":"J Syst Softw"},{"key":"5295_CR10","doi-asserted-by":"crossref","unstructured":"Gondara L, Wang K (2018) Mida: multiple imputation using denoising autoencoders. In: Pacific-asia conference on knowledge discovery and data mining. Springer, pp 260\u2013272","DOI":"10.1007\/978-3-319-93040-4_21"},{"key":"5295_CR11","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1016\/j.patrec.2020.05.032","volume":"136","author":"MS Santos","year":"2020","unstructured":"Santos MS, Abreu PH, Wilk S, Santos J (2020) How distance metrics influence missing data imputation with k-nearest neighbours. Pattern Recogn Lett 136:111\u2013119","journal-title":"Pattern Recogn Lett"},{"issue":"2","key":"5295_CR12","doi-asserted-by":"publisher","first-page":"1487","DOI":"10.1007\/s10462-019-09709-4","volume":"53","author":"W-C Lin","year":"2020","unstructured":"Lin W-C, Tsai C-F (2020) Missing value imputation: a review and analysis of the literature (2006\u20132017). Artif Intell Rev 53(2):1487\u20131509","journal-title":"Artif Intell Rev"},{"key":"5295_CR13","unstructured":"Zhang Z (2016) Missing data imputation: focusing on single imputation. Ann Transl Med 4(1)"},{"issue":"1","key":"5295_CR14","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1111\/j.1751-5823.2010.00103.x","volume":"78","author":"RR Andridge","year":"2010","unstructured":"Andridge RR, Little RJ (2010) A review of hot deck imputation for survey non-response. Int Stat Rev 78(1):40\u201364","journal-title":"Int Stat Rev"},{"issue":"3","key":"5295_CR15","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1002\/bimj.200710423","volume":"50","author":"M Taljaard","year":"2008","unstructured":"Taljaard M, Donner A, Klar N (2008) Imputation strategies for missing continuous outcomes in cluster randomized trials. Biom J 50(3):329\u2013345","journal-title":"Biom J"},{"issue":"4","key":"5295_CR16","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1002\/sim.4067","volume":"30","author":"IR White","year":"2011","unstructured":"White IR, Royston P, Wood AM (2011) Multiple imputation using chained equations: issues and guidance for practice. Stat Med 30(4):377\u2013399","journal-title":"Stat Med"},{"key":"5295_CR17","unstructured":"Lee D, Seung HS (2000) Algorithms for non-negative matrix factorization. Adv Neural Inf Process Syst 13"},{"key":"5295_CR18","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1016\/j.knosys.2013.08.023","volume":"53","author":"MG Rahman","year":"2013","unstructured":"Rahman MG, Islam MZ (2013) Missing value imputation using decision trees and decision forests by splitting and merging records: two novel techniques. Knowl-Based Syst 53:51\u201365","journal-title":"Knowl-Based Syst"},{"issue":"2","key":"5295_CR19","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1016\/j.artmed.2010.05.002","volume":"50","author":"JM Jerez","year":"2010","unstructured":"Jerez JM, Molina I, Garc\u00eda-Laencina PJ, Alba E, Ribelles N, Mart\u00edn M, Franco L (2010) Missing data imputation using statistical and machine learning methods in a real breast cancer problem. Artif Intell Med 50(2):105\u2013115","journal-title":"Artif Intell Med"},{"key":"5295_CR20","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1016\/j.neucom.2014.02.037","volume":"138","author":"V Ravi","year":"2014","unstructured":"Ravi V, Krishna M (2014) A new online data imputation method based on general regression auto associative neural network. Neurocomputing 138:106\u2013113","journal-title":"Neurocomputing"},{"key":"5295_CR21","doi-asserted-by":"crossref","unstructured":"Singh N, Javeed A, Chhabra S, Kumar P (2015) Missing value imputation with unsupervised Kohonen self organizing map. In: Emerging research in computing, information, communication and applications. Springer, pp 61\u201376","DOI":"10.1007\/978-81-322-2550-8_7"},{"issue":"4","key":"5295_CR22","doi-asserted-by":"publisher","first-page":"1333","DOI":"10.1016\/j.eswa.2012.08.057","volume":"40","author":"PJ Garc\u00eda-Laencina","year":"2013","unstructured":"Garc\u00eda-Laencina PJ, Sancho-G\u00f3mez J-L, Figueiras-Vidal AR (2013) Classifying patterns with missing values using multi-task learning perceptrons. Expert Syst Appl 40(4):1333\u20131341","journal-title":"Expert Syst Appl"},{"issue":"21","key":"5295_CR23","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1016\/j.ifacol.2018.09.406","volume":"51","author":"JT McCoy","year":"2018","unstructured":"McCoy JT, Kroon S, Auret L (2018) Variational autoencoders for missing data imputation with application to a simulated milling circuit. IFAC-PapersOnLine 51(21):141\u2013146","journal-title":"IFAC-PapersOnLine"},{"key":"5295_CR24","doi-asserted-by":"crossref","unstructured":"Antoniou A, Storkey A, Edwards H (2017) Data augmentation generative adversarial networks. Preprint arXiv:1711.04340","DOI":"10.1007\/978-3-030-01424-7_58"},{"key":"5295_CR25","unstructured":"Mariani G, Scheidegger F, Istrate R, Bekas C, Malossi C (2018) BAGAN: data augmentation with balancing GAN. Preprint arXiv:1803.09655"},{"issue":"1","key":"5295_CR26","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/MSP.2017.2765202","volume":"35","author":"A Creswell","year":"2018","unstructured":"Creswell A, White T, Dumoulin V, Arulkumaran K, Sengupta B, Bharath AA (2018) Generative adversarial networks: an overview. IEEE Signal Proc Mag 35(1):53\u201365","journal-title":"IEEE Signal Proc Mag"},{"issue":"11","key":"5295_CR27","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2020) Generative adversarial networks. Commun ACM 63(11):139\u2013144","journal-title":"Commun ACM"},{"issue":"2","key":"5295_CR28","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.geoderma.2008.09.014","volume":"148","author":"S Lesch","year":"2008","unstructured":"Lesch S, Corwin D (2008) Prediction of spatial soil property information from ancillary sensor data using ordinary linear regression: model derivations, residual assumptions and model validation tests. Geoderma 148(2):130\u2013140","journal-title":"Geoderma"},{"issue":"12","key":"5295_CR29","doi-asserted-by":"publisher","first-page":"1049","DOI":"10.1080\/10629360600810434","volume":"76","author":"S Van Buuren","year":"2006","unstructured":"Van Buuren S, Brand JP, Groothuis-Oudshoorn CG, Rubin DB (2006) Fully conditional specification in multivariate imputation. J Stat Comput Simul 76(12):1049\u20131064","journal-title":"J Stat Comput Simul"},{"issue":"14","key":"5295_CR30","doi-asserted-by":"publisher","first-page":"2252","DOI":"10.1002\/sim.7654","volume":"37","author":"M Schomaker","year":"2018","unstructured":"Schomaker M, Heumann C (2018) Bootstrap inference when using multiple imputation. Stat Med 37(14):2252\u20132266","journal-title":"Stat Med"},{"issue":"1","key":"5295_CR31","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1007\/s10182-008-0053-6","volume":"92","author":"D Schunk","year":"2008","unstructured":"Schunk D (2008) A Markov chain Monte Carlo algorithm for multiple imputation in large surveys. AStA Adv Stat Anal 92(1):101-114","journal-title":"AStA Adv Stat Anal"},{"key":"5295_CR32","doi-asserted-by":"crossref","unstructured":"Li X, She J (2017) Collaborative variational autoencoder for recommender systems. In: Proceedings of the 23rd ACM SIGKDD international conference on knowledge discovery and data mining, pp 305\u2013314","DOI":"10.1145\/3097983.3098077"},{"key":"5295_CR33","unstructured":"Kusner MJ, Paige B, Hern\u00e1ndez-Lobato JM (2017) Grammar variational autoencoder. In: International conference on machine learning, pp 1945\u20131954. PMLR"},{"key":"5295_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12874-021-01272-3","volume":"21","author":"W Dong","year":"2021","unstructured":"Dong W, Fong DYT, Yoon J-S, Wan EYF, Bedford LE, Tang EHM, Lam CLK (2021) Generative adversarial networks for imputing missing data for big data clinical research. BMC Med Res Methodol 21:1\u201310","journal-title":"BMC Med Res Methodol"},{"key":"5295_CR35","doi-asserted-by":"crossref","unstructured":"Chen T, Guestrin C (2016) XGBoost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, pp 785\u2013794","DOI":"10.1145\/2939672.2939785"},{"key":"5295_CR36","unstructured":"Dua D, Graff C (2017) UCI Machine learning repository. http:\/\/archive.ics.uci.edu\/ml"},{"key":"5295_CR37","unstructured":"Zheng A, Casari A (2018) Feature engineering for machine learning: principles and techniques for data scientists. \u201cO\u2019Reilly Media, Inc.\u201d"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05295-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-024-05295-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05295-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T20:41:31Z","timestamp":1710362491000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-024-05295-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2]]},"references-count":37,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,2]]}},"alternative-id":["5295"],"URL":"https:\/\/doi.org\/10.1007\/s10489-024-05295-3","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2]]},"assertion":[{"value":"28 January 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 February 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}]}}