{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:19:09Z","timestamp":1772907549751,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2020,12,1]],"date-time":"2020-12-01T00:00:00Z","timestamp":1606780800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2020,12,9]],"date-time":"2020-12-09T00:00:00Z","timestamp":1607472000000},"content-version":"vor","delay-in-days":8,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100013302","name":"King Abdullah International Medical Research Center","doi-asserted-by":"crossref","award":["RC20\/024\/R"],"award-info":[{"award-number":["RC20\/024\/R"]}],"id":[{"id":"10.13039\/501100013302","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Big Data"],"published-print":{"date-parts":[[2020,12]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>Outpatients who fail to attend their appointments have a negative impact on the healthcare outcome. Thus, healthcare organizations facing new opportunities, one of them is to improve the quality of healthcare. The main challenges is predictive analysis using techniques capable of handle the huge data generated. We propose a big data framework for identifying subject outpatients\u2019 no-show via feature engineering and machine learning (MLlib) in the Spark platform. This study evaluates the performance of five machine learning techniques, using the (2,011,813\u202c) outpatients\u2019 visits data. Conducting several experiments and using different validation methods, the Gradient Boosting (GB) performed best, resulting in an increase of accuracy and ROC to 79% and 81%, respectively. In addition, we showed that exploring and evaluating the performance of the machine learning models using various evaluation methods is critical as the accuracy of prediction can significantly differ. The aim of this paper is exploring factors that affect no-show rate and can be used to formulate predictions using big data machine learning techniques.<\/jats:p>","DOI":"10.1186\/s40537-020-00384-9","type":"journal-article","created":{"date-parts":[[2020,12,9]],"date-time":"2020-12-09T08:21:59Z","timestamp":1607502119000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["Predictors of outpatients\u2019 no-show: big data analytics using apache spark"],"prefix":"10.1186","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0268-2629","authenticated-orcid":false,"given":"Tahani","family":"Daghistani","sequence":"first","affiliation":[]},{"given":"Huda","family":"AlGhamdi","sequence":"additional","affiliation":[]},{"given":"Riyad","family":"Alshammari","sequence":"additional","affiliation":[]},{"given":"Raed H.","family":"AlHazme","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,12,9]]},"reference":[{"issue":"03","key":"384_CR1","doi-asserted-by":"publisher","first-page":"836","DOI":"10.4338\/ACI-2014-04-RA-0026","volume":"5","author":"Y Huang","year":"2014","unstructured":"Huang Y, Hanauer DA. Patient no-show predictive model development using multiple data sources for an effective overbooking approach. Appl Clin Inform. 2014;5(03):836\u201360.","journal-title":"Appl Clin Inform"},{"issue":"1","key":"384_CR2","first-page":"2","volume":"2","author":"J Denney","year":"2019","unstructured":"Denney J, Coyne S, Rafiqi S. Machine learning predictions of no-show appointments in a primary care setting. SMU Data Sci Rev. 2019;2(1):2.","journal-title":"SMU Data Sci Rev"},{"issue":"57","key":"384_CR3","doi-asserted-by":"publisher","first-page":"428","DOI":"10.1016\/j.dss.2012.10.048","volume":"1","author":"JB Norris","year":"2014","unstructured":"Norris JB, Kumar C, Chand S, Moskowitz H, Shade SA, Willis DR. An empirical investigation into factors affecting patient cancellations and no-shows at outpatient clinics. Decis Support Syst. 2014;1(57):428\u201343.","journal-title":"Decis Support Syst"},{"key":"384_CR4","doi-asserted-by":"crossref","unstructured":"Samorani M, Harris S, Blount LG, Lu H, Santoro MA. Overbooked and Overlooked: Machine Learning and Racial Bias in Medical Appointment Scheduling. Available at SSRN 3467047. 2019 Oct 9.","DOI":"10.2139\/ssrn.3467047"},{"issue":"1","key":"384_CR5","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1016\/j.ejor.2014.06.034","volume":"240","author":"M Samorani","year":"2015","unstructured":"Samorani M, LaGanga LR. Outpatient appointment scheduling given individual day-dependent no-show predictions. Eur J Oper Res. 2015;240(1):245\u201357.","journal-title":"Eur J Oper Res"},{"key":"384_CR6","doi-asserted-by":"crossref","unstructured":"Elvira C, Ochoa A, Gonzalvez JC, Moch\u00f3n F. Machine-learning-based no show prediction in outpatient visits. International Journal of Interactive Multimedia & Artificial Intelligence. 2018 Mar 1;4(7).","DOI":"10.9781\/ijimai.2017.03.004"},{"key":"384_CR7","doi-asserted-by":"crossref","unstructured":"Assefi M, Behravesh E, Liu G, Tafti AP. Big data machine learning using apache spark MLlib. In2017 IEEE International Conference on Big Data (Big Data) 2017 Dec 11 (pp. 3492-3498). IEEE.","DOI":"10.1109\/BigData.2017.8258338"},{"issue":"6","key":"384_CR8","doi-asserted-by":"publisher","first-page":"724","DOI":"10.1007\/s11606-014-3165-6","volume":"30","author":"DM Blumenthal","year":"2015","unstructured":"Blumenthal DM, Singal G, Mangla SS, Macklin EA, Chung DC. Predicting non-adherence with outpatient colonoscopy using a novel electronic tool that measures prior non-adherence. J Gen Intern Med. 2015;30(6):724\u201331.","journal-title":"J Gen Intern Med"},{"issue":"3","key":"384_CR9","doi-asserted-by":"publisher","first-page":"730","DOI":"10.1177\/1932296815614866","volume":"10","author":"H Kurasawa","year":"2016","unstructured":"Kurasawa H, Hayashi K, Fujino A, Takasugi K, Haga T, Waki K, Noguchi T, Ohe K. Machine-learning-based prediction of a missed scheduled clinical appointment by patients with diabetes. J Diab Sci Technol. 2016;10(3):730\u20136.","journal-title":"J Diab Sci Technol"},{"key":"384_CR10","doi-asserted-by":"crossref","unstructured":"Devasahay SR, Karpagam S, Ma NL. Predicting appointment misses in hospitals using data analytics. Mhealth. 2017;3.","DOI":"10.21037\/mhealth.2017.03.03"},{"issue":"5\u20136","key":"384_CR11","doi-asserted-by":"publisher","first-page":"e1708","DOI":"10.7205\/MILMED-D-16-00345","volume":"182","author":"RM Goffman","year":"2017","unstructured":"Goffman RM, Harris SL, May JH, Milicevic AS, Monte RJ, Myaskovsky L, Rodriguez KL, Tjader YC, Vargas DL. Modeling patient no-show history and predicting future outpatient appointment behavior in the Veterans Health Administration. Mil Med. 2017;182(5\u20136):e1708\u201314.","journal-title":"Mil Med"},{"issue":"10","key":"384_CR12","doi-asserted-by":"publisher","first-page":"1303","DOI":"10.1016\/j.jacr.2017.05.007","volume":"14","author":"HB Harvey","year":"2017","unstructured":"Harvey HB, Liu C, Ai J, Jaworsky C, Guerrier CE, Flores E, Pianykh O. Predicting no-shows in radiology using regression modeling of data available in the electronic medical record. J American College Radiol. 2017;14(10):1303\u20139.","journal-title":"J American College Radiol"},{"issue":"102","key":"384_CR13","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1016\/j.eswa.2018.02.022","volume":"15","author":"S Srinivas","year":"2018","unstructured":"Srinivas S, Ravindran AR. Optimizing outpatient appointment system using machine learning algorithms and scheduling rules: a prescriptive analytics framework. Expert Syst Appl. 2018;15(102):245\u201361.","journal-title":"Expert Syst Appl"},{"key":"384_CR14","doi-asserted-by":"publisher","first-page":"215013271881169","DOI":"10.1177\/2150132718811692","volume":"9","author":"I Mohammadi","year":"2018","unstructured":"Mohammadi I, Wu H, Turkcan A, Toscos T, Doebbeling BN. Data analytics and modeling for appointment no-show in community health centers. J Primary Care Community Health. 2018;9:2150132718811692.","journal-title":"J Primary Care Community Health"},{"issue":"1","key":"384_CR15","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1007\/s11695-018-3480-9","volume":"29","author":"LF Dantas","year":"2019","unstructured":"Dantas LF, Hamacher S, Oliveira FL, Barbosa SD, Viegas F. Predicting patient no-show behavior: a study in a bariatric clinic. Obes Surg. 2019;29(1):40\u20137.","journal-title":"Obes Surg"},{"issue":"1","key":"384_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41746-019-0103-3","volume":"2","author":"A Nelson","year":"2019","unstructured":"Nelson A, Herron D, Rees G, Nachev P. Predicting scheduled hospital attendance with artificial intelligence. NPJ digital Med. 2019;2(1):1\u20137.","journal-title":"NPJ digital Med"},{"issue":"6","key":"384_CR17","doi-asserted-by":"publisher","first-page":"373","DOI":"10.5144\/0256-4947.2019.373","volume":"39","author":"S AlMuhaideb","year":"2019","unstructured":"AlMuhaideb S, Alswailem O, Alsubaie N, Ferwana I, Alnajem A. Prediction of hospital no-show appointments through artificial intelligence algorithms. Ann Saudi Med. 2019;39(6):373\u201381.","journal-title":"Ann Saudi Med"},{"issue":"3","key":"384_CR18","doi-asserted-by":"publisher","first-page":"272","DOI":"10.1080\/24725579.2019.1649764","volume":"9","author":"E Ahmadi","year":"2019","unstructured":"Ahmadi E, Garcia-Arce A, Masel DT, Reich E, Puckey J, Maff R. A metaheuristic-based stacking model for predicting the risk of patient no-show and late cancellation for neurology appointments. IISE Transact Healthcare Syst Engineering. 2019;9(3):272\u201391.","journal-title":"IISE Transact Healthcare Syst Engineering"},{"key":"384_CR19","unstructured":"Dashtban M, Li W. Deep learning for predicting non-attendance in hospital outpatient appointments."},{"key":"384_CR20","doi-asserted-by":"crossref","unstructured":"Hung PD, Hanh TD, Diep VT. Breast cancer prediction using spark MLlib and ML packages. InProceedings of the 2018 5th International Conference on Bioinformatics Research and Applications 2018 Dec 27 (pp. 52-59).","DOI":"10.1145\/3309129.3309133"},{"issue":"6","key":"384_CR21","first-page":"239484","volume":"2","author":"S Nikhar","year":"2016","unstructured":"Nikhar S, Karandikar AM. Prediction of heart disease using machine learning algorithms. Int J Advanced Engineering, Management Sci. 2016;2(6):239484.","journal-title":"Int J Advanced Engineering, Management Sci"},{"issue":"3\u20134","key":"384_CR22","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1007\/s41060-016-0027-9","volume":"1","author":"S Salloum","year":"2016","unstructured":"Salloum S, Dautov R, Chen X, Peng PX, Huang JZ. Big data analytics on Apache Spark. Int J Data Sci Anal. 2016;1(3\u20134):145\u201364.","journal-title":"Int J Data Sci Anal"},{"issue":"1","key":"384_CR23","doi-asserted-by":"publisher","first-page":"602","DOI":"10.1080\/21642583.2014.956265","volume":"2","author":"K Fawagreh","year":"2014","unstructured":"Fawagreh K, Gaber MM, Elyan E. Random forests: from early developments to recent advancements. Systems Sci Control Engineering Open Access J. 2014;2(1):602\u20139.","journal-title":"Systems Sci Control Engineering Open Access J"},{"issue":"67","key":"384_CR24","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1016\/j.isprsjprs.2011.11.002","volume":"1","author":"VF Rodriguez-Galiano","year":"2012","unstructured":"Rodriguez-Galiano VF, Ghimire B, Rogan J, Chica-Olmo M, Rigol-Sanchez JP. An assessment of the effectiveness of a random forest classifier for land-cover classification. ISPRS J Photogrammetry Remote Sensing. 2012;1(67):93\u2013104.","journal-title":"ISPRS J Photogrammetry Remote Sensing"},{"issue":"1","key":"384_CR25","doi-asserted-by":"publisher","first-page":"s13742-016","DOI":"10.1186\/s13742-016-0117-6","volume":"5","author":"ID Dinov","year":"2016","unstructured":"Dinov ID. Methodological challenges and analytic opportunities for modeling and interpreting Big Healthcare Data. Gigascience. 2016;5(1):s13742-016.","journal-title":"Gigascience."},{"issue":"4","key":"384_CR26","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1016\/S0167-9473(01)00065-2","volume":"38","author":"JH Friedman","year":"2002","unstructured":"Friedman JH. Stochastic gradient boosting. Comput Stat Data Anal. 2002;38(4):367\u201378.","journal-title":"Comput Stat Data Anal"},{"key":"384_CR27","doi-asserted-by":"crossref","unstructured":"Jiang H, Mo L, Xun X. Idle construction land prediction with Gradient Boosting Machine. In2016 International Conference on Progress in Informatics and Computing (PIC) 2016 Dec 23 (pp. 295-299). IEEE.","DOI":"10.1109\/PIC.2016.7949513"},{"key":"384_CR28","doi-asserted-by":"crossref","unstructured":"Mishra AK, Keserwani PK, Samaddar SG, Lamichaney HB, Mishra AK. A decision support system in healthcare prediction. InAdvanced Computational and Communication Paradigms 2018 (pp. 156-167). Springer, Singapore.","DOI":"10.1007\/978-981-10-8240-5_18"},{"key":"384_CR29","unstructured":"Cramer JS. The origins of logistic regression."},{"issue":"72","key":"384_CR30","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1016\/j.procs.2015.12.145","volume":"1","author":"N Jothi","year":"2015","unstructured":"Jothi N, Husain W. Data mining in healthcare\u2013a review. Procedia Computer Sci. 2015;1(72):306\u201313.","journal-title":"Procedia Computer Sci"},{"issue":"1\u20133","key":"384_CR31","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/j.neucom.2010.02.019","volume":"74","author":"GB Huang","year":"2010","unstructured":"Huang GB, Ding X, Zhou H. Optimization method based extreme learning machine for classification. Neurocomputing. 2010;74(1\u20133):155\u201363.","journal-title":"Neurocomputing."},{"key":"384_CR32","doi-asserted-by":"crossref","unstructured":"Ahmad P, Qamar S, Rizvi SQ. Techniques of data mining in healthcare: a review. International Journal of Computer Applications. 2015 Jan 1;120(15).","DOI":"10.5120\/21307-4126"},{"issue":"85","key":"384_CR33","doi-asserted-by":"publisher","first-page":"862","DOI":"10.1016\/j.procs.2016.05.276","volume":"1","author":"TR Baitharu","year":"2016","unstructured":"Baitharu TR, Pani SK. Analysis of data mining techniques for healthcare decision support system using liver disorder dataset. Procedia Computer Sci. 2016;1(85):862\u201370.","journal-title":"Procedia Computer Sci"},{"key":"384_CR34","doi-asserted-by":"crossref","unstructured":"Esugasini S, Mashor MY, Isa NA, Othman NH. Performance comparison for MLP networks using various back propagation algorithms for breast cancer diagnosis. In International Conference on Knowledge-Based and Intelligent Information and Engineering Systems 2005 Sep 14 (pp. 123-130). Springer, Berlin, Heidelberg.","DOI":"10.1007\/11552451_17"},{"issue":"4","key":"384_CR35","doi-asserted-by":"publisher","first-page":"e0195344","DOI":"10.1371\/journal.pone.0195344","volume":"13","author":"S Sakr","year":"2018","unstructured":"Sakr S, Elshawi R, Ahmed A, Qureshi WT, Brawner C, Keteyian S, Blaha MJ, Al-Mallah MH. Using machine learning on cardiorespiratory fitness data for predicting hypertension: the Henry Ford ExercIse Testing (FIT) Project. PLoS ONE. 2018;13(4):e0195344.","journal-title":"PLoS ONE"},{"key":"384_CR36","doi-asserted-by":"crossref","unstructured":"Patel AC, Markey MK. Comparison of three-class classification performance metrics: a case study in breast cancer CAD. InMedical imaging 2005: Image perception, observer performance, and technology assessment 2005 Apr 6 (Vol. 5749, pp. 581-589). International Society for Optics and Photonics.","DOI":"10.1117\/12.595763"},{"key":"384_CR37","doi-asserted-by":"crossref","unstructured":"Elgendy, N. and Elragal, A., 2014, July. Big data analytics: a literature review paper. In\u00a0Industrial conference on data mining\u00a0(pp. 214-227). Springer, Cham.","DOI":"10.1007\/978-3-319-08976-8_16"},{"issue":"1","key":"384_CR38","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1186\/1471-2296-6-47","volume":"6","author":"RD Neal","year":"2005","unstructured":"Neal RD, Hussain-Gambles M, Allgar VL, Lawlor DA, Dempsey O. Reasons for and consequences of missed appointments in general practice in the UK: questionnaire survey and prospective review of medical records. BMC Family Practice. 2005;6(1):47.","journal-title":"BMC Family Practice"},{"issue":"2","key":"384_CR39","doi-asserted-by":"publisher","first-page":"258","DOI":"10.5455\/ijmsph.2013.2.256-265","volume":"2","author":"Z Alhamad","year":"2013","unstructured":"Alhamad Z. Reasons for missing appointments in general clinics of primary health care center in Riyadh Military Hospital, Saudi Arabia. International Journal of Medical Science and Public Health. 2013;2(2):258\u201368.","journal-title":"International Journal of Medical Science and Public Health."},{"issue":"4","key":"384_CR40","doi-asserted-by":"publisher","first-page":"554","DOI":"10.1016\/j.jacr.2018.12.046","volume":"16","author":"RJ Mieloszyk","year":"2019","unstructured":"Mieloszyk RJ, Rosenbaum JI, Hall CS, Hippe DS, Gunn ML, Bhargava P. Environmental factors predictive of no-show visits in radiology: observations of three million outpatient imaging visits over 16 years. J American College Radiol. 2019;16(4):554\u20139.","journal-title":"J American College Radiol"},{"issue":"1","key":"384_CR41","first-page":"u209266-w3789","volume":"5","author":"K Mohamed","year":"2016","unstructured":"Mohamed K, Mustafa A, Tahtamouni S, Taha E, Hassan R. A quality improvement project to reduce the \u2018No Show\u2019rate in a paediatric neurology clinic. BMJ Open Quality. 2016;5(1):u209266-w3789.","journal-title":"BMJ Open Quality"},{"issue":"1","key":"384_CR42","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1111\/acem.12503","volume":"22","author":"S Arora","year":"2015","unstructured":"Arora S, Burner E, Terp S, Nok Lam C, Nercisian A, Bhatt V, Menchine M. Improving attendance at post\u2013emergency department follow-up via automated text message appointment reminders: a randomized controlled trial. Acad Emerg Med. 2015;22(1):31\u20137.","journal-title":"Acad Emerg Med"},{"issue":"5","key":"384_CR43","doi-asserted-by":"publisher","first-page":"605","DOI":"10.1057\/jors.2013.1","volume":"65","author":"Y Huang","year":"2014","unstructured":"Huang Y, Zuniga P. Effective cancellation policy to reduce the negative impact of patient no-show. Journal of the Operational Research Society. 2014;65(5):605\u201315.","journal-title":"Journal of the Operational Research Society."},{"issue":"1","key":"384_CR44","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/BF03262661","volume":"10","author":"J Foley","year":"2009","unstructured":"Foley J, O\u2019Neill M. Use of mobile telephone short message service (SMS) as a reminder: the effect on patient attendance. European Archives Paediatric Dentistry. 2009;10(1):15\u20138.","journal-title":"European Archives Paediatric Dentistry"},{"issue":"6","key":"384_CR45","doi-asserted-by":"publisher","first-page":"542","DOI":"10.1016\/j.amjmed.2009.11.022","volume":"123","author":"A Parikh","year":"2010","unstructured":"Parikh A, Gupta K, Wilson AC, Fields K, Cosgrove NM, Kostis JB. The effectiveness of outpatient appointment reminder systems in reducing no-show rates. Am J Med. 2010;123(6):542\u20138.","journal-title":"Am J Med"}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-020-00384-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s40537-020-00384-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-020-00384-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,12,9]],"date-time":"2020-12-09T08:43:59Z","timestamp":1607503439000},"score":1,"resource":{"primary":{"URL":"https:\/\/journalofbigdata.springeropen.com\/articles\/10.1186\/s40537-020-00384-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2020,12]]}},"alternative-id":["384"],"URL":"https:\/\/doi.org\/10.1186\/s40537-020-00384-9","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-33216\/v3","asserted-by":"object"},{"id-type":"doi","id":"10.21203\/rs.3.rs-33216\/v4","asserted-by":"object"},{"id-type":"doi","id":"10.21203\/rs.3.rs-33216\/v1","asserted-by":"object"},{"id-type":"doi","id":"10.21203\/rs.3.rs-33216\/v2","asserted-by":"object"}]},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,12]]},"assertion":[{"value":"2 June 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 November 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare that they have no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"108"}}