{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T04:03:16Z","timestamp":1747108996830,"version":"3.40.5"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T00:00:00Z","timestamp":1747008000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T00:00:00Z","timestamp":1747008000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-025-04003-3","type":"journal-article","created":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T13:14:18Z","timestamp":1747055658000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Systematic Selection of a Suitable Data Imputation Technique Based on Data Characteristics"],"prefix":"10.1007","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2400-7691","authenticated-orcid":false,"given":"Anu Maria","family":"Sebastian","sequence":"first","affiliation":[]},{"given":"David","family":"Peter","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"issue":"4","key":"4003_CR1","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1007\/s41060-020-00240-2","volume":"11","author":"V Grossi","year":"2021","unstructured":"Grossi V, Giannotti F, Pedreschi D, Manghi P, Pagano P, Assante M. Data science: a game changer for science and innovation. Int J Data Sci Anal. 2021;11(4):263\u201378. https:\/\/doi.org\/10.1007\/s41060-020-00240-2.","journal-title":"Int J Data Sci Anal"},{"issue":"1","key":"4003_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1017\/BrImp.2014.2","volume":"15","author":"C Padgett","year":"2014","unstructured":"Padgett C, Skilbeck C, Summers M. Missing data: the importance and impact of missing data from clinical research. Brain Impairment. 2014;15(1):1\u20139.","journal-title":"Brain Impairment"},{"key":"4003_CR3","doi-asserted-by":"publisher","unstructured":"Eirola E, Akusok A, Bj\u00f6rk K, Johnson H & Lendasse A (2017) Predicting huntington\u2019s disease: extreme learning machine with missing values. In: Proceedings in adaptation, learning and optimization, 195\u2013206. https:\/\/doi.org\/10.1007\/978-3-319-57421-9_16.","DOI":"10.1007\/978-3-319-57421-9_16"},{"key":"4003_CR4","doi-asserted-by":"publisher","unstructured":"Chowdhury, M., Islam, M., & Khan, S. (2017). Imputation of missing healthcare data. 2017 20Th International Conference Of Computer And Information Technology (ICCIT). https:\/\/doi.org\/10.1109\/iccitechn.2017.8281805.","DOI":"10.1109\/iccitechn.2017.8281805"},{"issue":"5","key":"4003_CR5","doi-asserted-by":"publisher","first-page":"692","DOI":"10.1109\/tsmca.2007.902631","volume":"37","author":"A Farhangfar","year":"2007","unstructured":"Farhangfar A, Kurgan L, Pedrycz W. A novel framework for imputation of missing values in databases. IEEE Trans Syst Man Cybern Part A. 2007;37(5):692\u2013709. https:\/\/doi.org\/10.1109\/tsmca.2007.902631.","journal-title":"IEEE Trans Syst Man Cybern Part A"},{"issue":"3","key":"4003_CR6","doi-asserted-by":"publisher","first-page":"568","DOI":"10.1093\/aje\/kwx348","volume":"187","author":"N Perkins","year":"2017","unstructured":"Perkins N, Cole S, Harel O, Tchetgen Tchetgen E, Sun B, Mitchell E, Schisterman E. Principled approaches to missing data in epidemiologic studies. Am J Epidemiol. 2017;187(3):568\u201375. https:\/\/doi.org\/10.1093\/aje\/kwx348.","journal-title":"Am J Epidemiol"},{"key":"4003_CR7","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1016\/j.ins.2018.07.017","volume":"465","author":"F Fan","year":"2018","unstructured":"Fan F, Li Z, Chen Q, Chen L. Relational data imputation with quality guarantee. Inf Sci. 2018;465:305\u201322. https:\/\/doi.org\/10.1016\/j.ins.2018.07.017.","journal-title":"Inf Sci"},{"key":"4003_CR8","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1186\/s12874-024-02157-x","volume":"24","author":"A Remiro-Az\u00f3car","year":"2024","unstructured":"Remiro-Az\u00f3car A, Heath A, Baio G. Model-based standardization using multiple imputation. BMC Med Res Methodol. 2024;24:32. https:\/\/doi.org\/10.1186\/s12874-024-02157-x.","journal-title":"BMC Med Res Methodol"},{"key":"4003_CR9","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1016\/j.cmpb.2017.12.011","volume":"155","author":"N Shukla","year":"2018","unstructured":"Shukla N, Hagenbuchner M, Win K, Yang J. Breast cancer data analysis for survivability studies and prediction. Comput Methods Programs Biomed. 2018;155:199\u2013208. https:\/\/doi.org\/10.1016\/j.cmpb.2017.12.011.","journal-title":"Comput Methods Programs Biomed"},{"key":"4003_CR10","doi-asserted-by":"publisher","first-page":"487","DOI":"10.1016\/j.compeleceng.2017.11.030","volume":"66","author":"U Yelipe","year":"2018","unstructured":"Yelipe U, Porika S, Golla M. An efficient approach for imputation and classification of medical data values using class-based clustering of medical records. Comput Electr Eng. 2018;66:487\u2013504. https:\/\/doi.org\/10.1016\/j.compeleceng.2017.11.030.","journal-title":"Comput Electr Eng"},{"key":"4003_CR11","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-021-00516-9","author":"T Emmanuel","year":"2021","unstructured":"Emmanuel T, Maupong T, Mpoeleng D, Semong T, Mphago B, Tabona O. A survey on missing data in machine learning. J Big Data. 2021. https:\/\/doi.org\/10.1186\/s40537-021-00516-9.","journal-title":"J Big Data"},{"issue":"October","key":"4003_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.dajour.2023.100341","volume":"9","author":"S Alam","year":"2023","unstructured":"Alam S, Ayub MS, Arora S, Khan MA. An investigation of the imputation techniques for missing values in ordinal data enhancing clustering and classification analysis validity. Decis Anal J. 2023;9(October): 100341. https:\/\/doi.org\/10.1016\/j.dajour.2023.100341.","journal-title":"Decis Anal J"},{"key":"4003_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122775","volume":"242","author":"DG Kim","year":"2024","unstructured":"Kim DG, Choi JY. Efficient imputation of missing data using the information of local space defined by the geometric one-class classifier. Expert Syst Appl. 2024;242: 122775. https:\/\/doi.org\/10.1016\/j.eswa.2023.122775.","journal-title":"Expert Syst Appl"},{"key":"4003_CR14","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1016\/j.jbi.2017.03.009","volume":"68","author":"Z Hu","year":"2017","unstructured":"Hu Z, Melton G, Arsoniadis E, Wang Y, Kwaan M, Simon G. Strategies for handling missing clinical data for automated surgical site infection detection from the electronic health record. J Biomed Inform. 2017;68:112\u201320. https:\/\/doi.org\/10.1016\/j.jbi.2017.03.009.","journal-title":"J Biomed Inform"},{"key":"4003_CR15","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1016\/j.ins.2016.01.018","volume":"339","author":"R Deb","year":"2016","unstructured":"Deb R, Liew A. Missing value imputation for the analysis of incomplete traffic accident data. Inf Sci. 2016;339:274\u201389. https:\/\/doi.org\/10.1016\/j.ins.2016.01.018.","journal-title":"Inf Sci"},{"issue":"6","key":"4003_CR16","doi-asserted-by":"publisher","first-page":"4361","DOI":"10.1007\/s00500-019-04199-6","volume":"24","author":"P Raja","year":"2019","unstructured":"Raja P, Thangavel K. Missing value imputation using unsupervised machine learning techniques. Soft Comput. 2019;24(6):4361\u201392. https:\/\/doi.org\/10.1007\/s00500-019-04199-6.","journal-title":"Soft Comput"},{"key":"4003_CR17","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1016\/j.archger.2017.07.009","volume":"73","author":"J Tan","year":"2017","unstructured":"Tan J, Li N, Lan X, Zhang S, Cui B, Liu L, et al. The impact of methods to handle missing data on the estimated prevalence of dementia and mild cognitive impairment in a cross-sectional study including non-responders. Arch Gerontol Geriatrics. 2017;73:43\u20139. https:\/\/doi.org\/10.1016\/j.archger.2017.07.009.","journal-title":"Arch Gerontol Geriatrics"},{"issue":"1\u20132","key":"4003_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.ins.2004.02.014","volume":"169","author":"I Wasito","year":"2005","unstructured":"Wasito I, Mirkin B. Nearest neighbour approach in the least-squares data imputation algorithms. Inf Sci. 2005;169(1\u20132):1\u201325. https:\/\/doi.org\/10.1016\/j.ins.2004.02.014.","journal-title":"Inf Sci"},{"issue":"3","key":"4003_CR19","doi-asserted-by":"publisher","first-page":"581","DOI":"10.2307\/2335739","volume":"63","author":"DB Rubin","year":"1976","unstructured":"Rubin DB. Inference and missing data. Biometrika. 1976;63(3):581. https:\/\/doi.org\/10.2307\/2335739.","journal-title":"Biometrika"},{"key":"4003_CR20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-43742-2","author":"L Celi","year":"2016","unstructured":"Celi L, Charlton P, Ghassemi M, Johnson A, Komorowski M, Marshall D, et al. Secondary analysis of electronic health records. MIT Critical Data. 2016. https:\/\/doi.org\/10.1007\/978-3-319-43742-2.","journal-title":"MIT Critical Data"},{"key":"4003_CR21","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1016\/j.knosys.2016.01.048","volume":"99","author":"L Zhang","year":"2016","unstructured":"Zhang L, Lu W, Liu X, Pedrycz W, Zhong C. Fuzzy C-Means clustering of incomplete data based on probabilistic information granules of missing values. Knowl-Based Syst. 2016;99:51\u201370. https:\/\/doi.org\/10.1016\/j.knosys.2016.01.048.","journal-title":"Knowl-Based Syst"},{"key":"4003_CR22","doi-asserted-by":"publisher","DOI":"10.1002\/bimj.202200107","author":"HI Oberman","year":"2024","unstructured":"Oberman HI, Vink G. Toward a standardized evaluation of imputation methodology. Biometrical J. 2024. https:\/\/doi.org\/10.1002\/bimj.202200107.","journal-title":"Biometrical J"},{"issue":"1","key":"4003_CR23","doi-asserted-by":"publisher","first-page":"183","DOI":"10.2218\/ijdc.v13i1.620","volume":"13","author":"M Senseney","year":"2018","unstructured":"Senseney M, et al. Data mining research with in-copyright and use-limited text datasets: preliminary findings from a systematic literature review and stakeholder interviews. Int J Digit Curation. 2018;13(1):183\u201394. https:\/\/doi.org\/10.2218\/ijdc.v13i1.620.","journal-title":"Int J Digit Curation"},{"issue":"1","key":"4003_CR24","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1177\/0049124112464866","volume":"42","author":"P Von Hippel","year":"2013","unstructured":"Von Hippel P. Should a normal imputation model be modified to impute skewed variables. Sociol Methods Res. 2013;42(1):105\u201338. https:\/\/doi.org\/10.1177\/0049124112464866.","journal-title":"Sociol Methods Res"},{"key":"4003_CR25","unstructured":"UCI Machine Learning Repository: Data Sets. (2021). Retrieved 23 September 2021, from https:\/\/archive.ics.uci.edu\/ml\/datasets.php."},{"key":"4003_CR26","unstructured":"Kaggle public data repository, (2021). https:\/\/www.kaggle.com\/datasets"},{"key":"4003_CR27","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1016\/j.knosys.2018.03.026","volume":"151","author":"C Tsai","year":"2018","unstructured":"Tsai C, Li M, Lin W. A class center based approach for missing value imputation. Knowl-Based Syst. 2018;151:124\u201335. https:\/\/doi.org\/10.1016\/j.knosys.2018.03.026.","journal-title":"Knowl-Based Syst"},{"key":"4003_CR28","doi-asserted-by":"crossref","unstructured":"Van Buuren, S. (2018). Flexible Imputation of Missing Data, Second (2nd ed.). Routledge.","DOI":"10.1201\/9780429492259"},{"key":"4003_CR29","doi-asserted-by":"publisher","first-page":"11651","DOI":"10.1109\/access.2019.2891360","volume":"7","author":"MS Santos","year":"2019","unstructured":"Santos MS, Pereira RC, Costa AF, Soares JP, Santos J, Abreu PH. Generating synthetic missing data: a review by missing mechanism. IEEE Access. 2019;7:11651\u201367. https:\/\/doi.org\/10.1109\/access.2019.2891360.","journal-title":"IEEE Access"},{"issue":"1","key":"4003_CR30","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1002\/mpr.329","volume":"20","author":"M Azur","year":"2011","unstructured":"Azur M, Stuart E, Frangakis C, Leaf P. Multiple imputation by chained equations: what is it and how does it work? Int J Methods Psychiatr Res. 2011;20(1):40\u20139. https:\/\/doi.org\/10.1002\/mpr.329.","journal-title":"Int J Methods Psychiatr Res"},{"issue":"3","key":"4003_CR31","doi-asserted-by":"publisher","first-page":"614","DOI":"10.1007\/s10489-015-0666-x","volume":"43","author":"R Pan","year":"2015","unstructured":"Pan R, Yang T, Cao J, Lu K, Zhang Z. Missing data imputation by K nearest neighbours based on grey relational structure and mutual information. Appl Intell. 2015;43(3):614\u201332. https:\/\/doi.org\/10.1007\/s10489-015-0666-x.","journal-title":"Appl Intell"},{"issue":"1","key":"4003_CR32","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1093\/bioinformatics\/btr597","volume":"28","author":"DJ Stekhoven","year":"2011","unstructured":"Stekhoven DJ, B\u00fchlmann P. MissForest: non-parametric missing value imputation for mixed-typedata. Bioinformatics. 2011;28(1):112\u20138.","journal-title":"Bioinformatics"},{"key":"4003_CR33","doi-asserted-by":"publisher","unstructured":"Patil BM, Joshi RC and Toshniwal D. Missing value imputation based on k-mean clustering with weighted distance. 2010, pp. 600\u2013609. https:\/\/doi.org\/10.1007\/978-3-642-14834-7_56.","DOI":"10.1007\/978-3-642-14834-7_56"},{"key":"4003_CR34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14825-5","volume-title":"Contemporary computing","author":"S Ranka","year":"2010","unstructured":"Ranka S. Contemporary computing. Berlin: Springer; 2010."},{"key":"4003_CR35","first-page":"2287","volume":"11","author":"R Mazumder","year":"2010","unstructured":"Mazumder R, Hastie T, Tibshirani R. Spectral regularization algorithms for learning large incomplete matrices. J Mach Learn Res. 2010;11:2287\u2013322.","journal-title":"J Mach Learn Res"},{"key":"4003_CR36","doi-asserted-by":"publisher","unstructured":"Salgado C, Azevedo C, Proen\u00e7a H & Vieira S (2016) Missing data. secondary analysis of electronic health records, 143\u2013162. https:\/\/doi.org\/10.1007\/978-3-319-43742-2_13.","DOI":"10.1007\/978-3-319-43742-2_13"},{"key":"4003_CR37","unstructured":"Wisconsin breast cancer data (2022). https:\/\/archive.ics.uci.edu\/dataset\/15\/breast+cancer+Wisconsin+original"},{"key":"4003_CR38","unstructured":"Mammographic data, (2022): https:\/\/archive.ics.uci.edu\/dataset\/161\/mammographic+mass"},{"key":"4003_CR39","unstructured":"Parkinson Diagnosis data, (2022): https:\/\/archive.ics.uci.edu\/dataset\/174\/parkinsons."},{"key":"4003_CR40","unstructured":"E. coli data, (2022): https:\/\/archive.ics.uci.edu\/dataset\/39\/ecoli"},{"key":"4003_CR41","volume-title":"SPSS for windows step by step","author":"D George","year":"2010","unstructured":"George D, Mallery P. SPSS for windows step by step. Boston: Allyn & Bacon; 2010."},{"key":"4003_CR42","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2288-12-184","author":"J Hardt","year":"2012","unstructured":"Hardt J, Herke M, Leonhart R. Auxiliary variables in multiple imputation in regression with missing X: a warning against including too many in small sample research. BMC Med Res Methodol. 2012. https:\/\/doi.org\/10.1186\/1471-2288-12-184.","journal-title":"BMC Med Res Methodol"},{"issue":"6062","key":"4003_CR43","doi-asserted-by":"publisher","first-page":"1518","DOI":"10.1126\/science.1205438","volume":"334","author":"D Reshef","year":"2011","unstructured":"Reshef D, Reshef Y, Finucane H, Grossman S, McVean G, Turnbaugh P, et al. Detecting Novel Associations in Large Data Sets. Science. 2011;334(6062):1518\u201324. https:\/\/doi.org\/10.1126\/science.1205438.","journal-title":"Science"},{"key":"4003_CR44","unstructured":"Diabetes data (2022): https:\/\/www.kaggle.com\/code\/mathchi\/diagnostic-a-patient-has-diabetes."},{"issue":"3","key":"4003_CR45","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1007\/s41060-018-0102-5","volume":"6","author":"C Weihs","year":"2018","unstructured":"Weihs C, Ickstadt K. Data Science: the impact of statistics. Int J Data Sci Anal. 2018;6(3):189\u201394. https:\/\/doi.org\/10.1007\/s41060-018-0102-5.","journal-title":"Int J Data Sci Anal"},{"key":"4003_CR46","first-page":"2017","volume":"15","author":"J Wulff","year":"2017","unstructured":"Wulff J, Linda E. Multiple imputation by chained equations in praxis: guidelines and review. Electron J Bus Res Methods. 2017;15:2017\u201358.","journal-title":"Electron J Bus Res Methods"},{"issue":"6","key":"4003_CR47","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1002\/sam.11348","volume":"10","author":"F Tang","year":"2017","unstructured":"Tang F, Ishwaran H. Random forest missing data algorithms. Statist Anal Data Min. 2017;10(6):363\u201377. https:\/\/doi.org\/10.1002\/sam.11348.","journal-title":"Statist Anal Data Min"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-04003-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-025-04003-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-04003-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T13:14:23Z","timestamp":1747055663000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-025-04003-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,12]]},"references-count":47,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2025,6]]}},"alternative-id":["4003"],"URL":"https:\/\/doi.org\/10.1007\/s42979-025-04003-3","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,12]]},"assertion":[{"value":"3 July 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"There is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Research Involving Human\/Animals"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed Consent"}}],"article-number":"465"}}