{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T11:57:06Z","timestamp":1777377426408,"version":"3.51.4"},"reference-count":51,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T00:00:00Z","timestamp":1772582400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100018693","name":"Horizon Europe","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100018693","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014440","name":"Espa\u00f1a Ministerio de Ciencia e Innovaci\u00f3n","doi-asserted-by":"publisher","award":["PID2022-140612OB-I00"],"award-info":[{"award-number":["PID2022-140612OB-I00"]}],"id":[{"id":"10.13039\/100014440","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Array"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.array.2026.100743","type":"journal-article","created":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T07:35:08Z","timestamp":1774683308000},"page":"100743","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Studying the impact of data preprocessing, hyperparameter tuning and machine learning algorithms in crash prediction explainability"],"prefix":"10.1016","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7648-0115","authenticated-orcid":false,"given":"Jon","family":"D\u00edaz-Aparicio","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8761-1626","authenticated-orcid":false,"given":"Erick","family":"Rodr\u00edguez-Esparza","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9355-3610","authenticated-orcid":false,"given":"Jenny","family":"Fajardo-Calder\u00edn","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9581-1823","authenticated-orcid":false,"given":"Enrique","family":"Onieva","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.array.2026.100743_b1","series-title":"Road traffic injuries report","author":"WHO","year":"2023"},{"key":"10.1016\/j.array.2026.100743_b2","series-title":"Recent advances in traffic accident analysis and prediction: A comprehensive review of machine learning techniques","author":"Behboudi","year":"2024"},{"key":"10.1016\/j.array.2026.100743_b3","doi-asserted-by":"crossref","DOI":"10.1016\/j.aap.2023.107378","article-title":"Advances, challenges, and future research needs in machine learning-based crash prediction models: A systematic review","volume":"194","author":"Ali","year":"2024","journal-title":"Accid Anal Prev"},{"issue":"5","key":"10.1016\/j.array.2026.100743_b4","doi-asserted-by":"crossref","first-page":"1019","DOI":"10.1016\/j.aap.2006.04.009","article-title":"Analysis of traffic injury severity: An application of non-parametric classification tree techniques","volume":"38","author":"Chang","year":"2006","journal-title":"Accid Anal Prev"},{"issue":"1","key":"10.1016\/j.array.2026.100743_b5","doi-asserted-by":"crossref","first-page":"20024","DOI":"10.1038\/s41598-022-24476-z","article-title":"Identifying high crash risk segments in rural roads using ensemble decision tree-based models","volume":"12","author":"Iranmanesh","year":"2022","journal-title":"Sci Rep"},{"key":"10.1016\/j.array.2026.100743_b6","doi-asserted-by":"crossref","first-page":"254","DOI":"10.1016\/j.jsr.2021.12.007","article-title":"A literature review of machine learning algorithms for crash injury severity prediction","volume":"80","author":"Santos","year":"2022","journal-title":"J Saf Res"},{"key":"10.1016\/j.array.2026.100743_b7","series-title":"2019 IEEE Jordan international joint conference on electrical engineering and information technology","first-page":"272","article-title":"Comparison of machine learning algorithms for predicting traffic accident severity","author":"AlMamlook","year":"2019"},{"key":"10.1016\/j.array.2026.100743_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.aap.2022.106937","article-title":"Factors affecting injury severity at pedestrian crossing locations with Rectangular RAPID Flashing Beacons (RRFB) using XGBoost and random parameters discrete outcome models","volume":"181","author":"Goswamy","year":"2023","journal-title":"Accid Anal Prev"},{"key":"10.1016\/j.array.2026.100743_b9","series-title":"The nature of statistical learning theory","author":"Vapnik","year":"2013"},{"key":"10.1016\/j.array.2026.100743_b10","doi-asserted-by":"crossref","first-page":"252","DOI":"10.1016\/j.aap.2012.11.027","article-title":"Utilizing support vector machine in real-time crash risk evaluation","volume":"51","author":"Yu","year":"2013","journal-title":"Accid Anal Prev"},{"key":"10.1016\/j.array.2026.100743_b11","doi-asserted-by":"crossref","DOI":"10.1016\/j.aap.2020.105665","article-title":"Efficient mapping of crash risk at intersections with connected vehicle data and deep learning models","volume":"144","author":"Hu","year":"2020","journal-title":"Accid Anal Prev"},{"key":"10.1016\/j.array.2026.100743_b12","doi-asserted-by":"crossref","DOI":"10.1016\/j.aap.2020.105520","article-title":"A long short-term memory-based framework for crash detection on freeways with traffic data of different temporal resolutions","volume":"141","author":"Jiang","year":"2020","journal-title":"Accid Anal Prev"},{"key":"10.1016\/j.array.2026.100743_b13","first-page":"79","article-title":"Hyperparameter tuning for machine learning algorithms used for arabic sentiment analysis","volume":"vol. 8","author":"Elgeldawi","year":"2021"},{"key":"10.1016\/j.array.2026.100743_b14","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1016\/j.neucom.2020.07.061","article-title":"On hyperparameter optimization of machine learning algorithms: Theory and practice","volume":"415","author":"Yang","year":"2020","journal-title":"Neurocomputing"},{"issue":"6","key":"10.1016\/j.array.2026.100743_b15","doi-asserted-by":"crossref","first-page":"388","DOI":"10.9734\/jerr\/2024\/v26i61188","article-title":"Hyperparameter tuning in machine learning: A comprehensive review","volume":"26","author":"a Ilemobayo","year":"2024","journal-title":"J Eng Res Rep"},{"issue":"6","key":"10.1016\/j.array.2026.100743_b16","doi-asserted-by":"crossref","first-page":"765","DOI":"10.1080\/01441647.2018.1442888","article-title":"In search of surrogate safety indicators for vulnerable road users: a review of surrogate safety indicators","volume":"38","author":"Johnsson","year":"2018","journal-title":"Transp Rev"},{"key":"10.1016\/j.array.2026.100743_b17","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1016\/j.jsr.2018.12.001","article-title":"Investigating safety of vulnerable road users in selected eu countries","volume":"68","author":"Olszewski","year":"2019","journal-title":"J Saf Res"},{"key":"10.1016\/j.array.2026.100743_b18","article-title":"Real-time crash potential prediction on freeways using connected vehicle data","volume":"36","author":"Zhang","year":"2022","journal-title":"Anal Methods Accid Res"},{"key":"10.1016\/j.array.2026.100743_b19","article-title":"A real-time crash prediction fusion framework: An imbalance-aware strategy for collision avoidance systems","volume":"118","author":"Abou Elassad","year":"2020","journal-title":"Transp Res Part C: Emerg Technol"},{"issue":"6","key":"10.1016\/j.array.2026.100743_b20","doi-asserted-by":"crossref","first-page":"775","DOI":"10.1016\/j.jtte.2020.07.004","article-title":"Machine learning applied to road safety modeling: A systematic literature review","volume":"7","author":"Silva","year":"2020","journal-title":"J Traffic Transp Eng (English Edition)"},{"issue":"5","key":"10.1016\/j.array.2026.100743_b21","doi-asserted-by":"crossref","first-page":"128","DOI":"10.1007\/s41062-024-01426-4","article-title":"Bridging conventional and proactive approaches for road safety analytic modeling and future perspectives","volume":"9","author":"Singh","year":"2024","journal-title":"Innov Infrastruct Solut."},{"issue":"1","key":"10.1016\/j.array.2026.100743_b22","doi-asserted-by":"crossref","DOI":"10.1080\/23311916.2022.2124637","article-title":"Road safety performance index: A tool for crash prediction","volume":"9","author":"Shbeeb","year":"2022","journal-title":"Cogent Eng"},{"issue":"2","key":"10.1016\/j.array.2026.100743_b23","doi-asserted-by":"crossref","first-page":"99","DOI":"10.59796\/jcst.V15N2.2025.99","article-title":"Effect of resampling techniques on machine learning models for classifying road accident severity in Thailand","volume":"15","author":"Simmachan","year":"2025","journal-title":"J Curr Sci Technol"},{"key":"10.1016\/j.array.2026.100743_b24","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1613\/jair.953","article-title":"SMOTE: synthetic minority over-sampling technique","volume":"16","author":"Chawla","year":"2002","journal-title":"J Artificial Intelligence Res"},{"key":"10.1016\/j.array.2026.100743_b25","doi-asserted-by":"crossref","DOI":"10.1016\/j.aap.2021.106240","article-title":"Effectiveness of resampling methods in coping with imbalanced crash data: Crash type analysis and predictive modeling","volume":"159","author":"Morris","year":"2021","journal-title":"Accid Anal Prev"},{"issue":"1","key":"10.1016\/j.array.2026.100743_b26","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1186\/s40537-025-01119-4","article-title":"Resampling approaches to handle class imbalance: a review from a data perspective","volume":"12","author":"Carvalho","year":"2025","journal-title":"J Big Data"},{"key":"10.1016\/j.array.2026.100743_b27","series-title":"Interpretable machine learning","author":"Molnar","year":"2020"},{"key":"10.1016\/j.array.2026.100743_b28","doi-asserted-by":"crossref","DOI":"10.1016\/j.aap.2022.106617","article-title":"On the interpretability of machine learning methods in crash frequency modeling and crash modification factor development","volume":"168","author":"Wen","year":"2022","journal-title":"Accid Anal Prev"},{"issue":"8\u20139","key":"10.1016\/j.array.2026.100743_b29","doi-asserted-by":"crossref","first-page":"1156","DOI":"10.1016\/j.ssci.2011.03.007","article-title":"A combined frequency\u2013severity approach for the analysis of rear-end crashes on urban arterials","volume":"49","author":"Das","year":"2011","journal-title":"Saf Sci"},{"key":"10.1016\/j.array.2026.100743_b30","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1016\/j.aap.2017.08.008","article-title":"Comparison of four statistical and machine learning methods for crash severity prediction","volume":"108","author":"Iranitalab","year":"2017","journal-title":"Accid Anal Prev"},{"key":"10.1016\/j.array.2026.100743_b31","series-title":"International conference on machine learning","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"Ioffe","year":"2015"},{"issue":"302","key":"10.1016\/j.array.2026.100743_b32","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1080\/01621459.1963.10500855","article-title":"Problems in the analysis of survey data, and a proposal","volume":"58","author":"Morgan","year":"1963","journal-title":"J Amer Statist Assoc"},{"key":"10.1016\/j.array.2026.100743_b33","first-page":"278","article-title":"Random decision forests","volume":"vol. 1","author":"Ho","year":"1995"},{"issue":"1","key":"10.1016\/j.array.2026.100743_b34","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s10994-006-6226-1","article-title":"Extremely randomized trees","volume":"63","author":"Geurts","year":"2006","journal-title":"Mach Learn"},{"issue":"1","key":"10.1016\/j.array.2026.100743_b35","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1006\/jcss.1997.1504","article-title":"A decision-theoretic generalization of on-line learning and an application to boosting","volume":"55","author":"Freund","year":"1997","journal-title":"J Comput System Sci"},{"key":"10.1016\/j.array.2026.100743_b36","first-page":"1189","article-title":"Greedy function approximation: a gradient boosting machine","author":"Friedman","year":"2001","journal-title":"Ann Stat"},{"key":"10.1016\/j.array.2026.100743_b37","series-title":"Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining","first-page":"785","article-title":"XGBoost: A scalable tree boosting system","author":"Chen","year":"2016"},{"issue":"3","key":"10.1016\/j.array.2026.100743_b38","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1023\/A:1022627411411","article-title":"Support-vector networks","volume":"20","author":"Cortes","year":"1995","journal-title":"Mach Learn"},{"issue":"1","key":"10.1016\/j.array.2026.100743_b39","first-page":"1","article-title":"Verification of forecasts expressed in terms of probability","volume":"78","author":"Glenn","year":"1950","journal-title":"Mon Weather Rev"},{"key":"10.1016\/j.array.2026.100743_b40","article-title":"Obtaining well calibrated probabilities using bayesian binning","volume":"vol. 29","author":"Naeini","year":"2015"},{"issue":"1\/2","key":"10.1016\/j.array.2026.100743_b41","doi-asserted-by":"crossref","first-page":"17","DOI":"10.2307\/2332142","article-title":"Notes on continuous stochastic phenomena","volume":"37","author":"Moran","year":"1950","journal-title":"Biometrika"},{"key":"10.1016\/j.array.2026.100743_b42","series-title":"A tutorial on Bayesian optimization of expensive cost functions, with application to active user modeling and hierarchical reinforcement learning","author":"Brochu","year":"2010"},{"key":"10.1016\/j.array.2026.100743_b43","series-title":"A value for n-person games","author":"Shapley","year":"1953"},{"key":"10.1016\/j.array.2026.100743_b44","series-title":"A unified approach to interpreting model predictions","author":"Lundberg","year":"2017"},{"key":"10.1016\/j.array.2026.100743_b45","first-page":"2825","article-title":"Scikit-learn: Machine learning in Python","volume":"12","author":"Pedregosa","year":"2011","journal-title":"J Mach Learn Res"},{"key":"10.1016\/j.array.2026.100743_b46","article-title":"Algorithms for hyper-parameter optimization","volume":"24","author":"Bergstra","year":"2011","journal-title":"Adv Neural Inf Process Syst"},{"key":"10.1016\/j.array.2026.100743_b47","doi-asserted-by":"crossref","unstructured":"Akiba T, Sano S, Yanase T, Ohta T, Koyama M. Optuna: A Next-generation Hyperparameter Optimization Framework. In: Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery and data mining. 2019.","DOI":"10.1145\/3292500.3330701"},{"issue":"1","key":"10.1016\/j.array.2026.100743_b48","doi-asserted-by":"crossref","first-page":"2522","DOI":"10.1038\/s42256-019-0138-9","article-title":"From local explanations to global understanding with explainable AI for trees","volume":"2","author":"Lundberg","year":"2020","journal-title":"Nat Mach Intell"},{"key":"10.1016\/j.array.2026.100743_b49","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.trc.2016.01.019","article-title":"Assessing public opinions of and interest in new vehicle technologies: An austin perspective","volume":"67","author":"Bansal","year":"2016","journal-title":"Transp Res Part C: Emerg Technol"},{"key":"10.1016\/j.array.2026.100743_b50","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1016\/j.trd.2019.01.014","article-title":"Describing the users: Understanding adoption of and interest in shared, electrified, and automated transportation in the San Francisco Bay Area","volume":"71","author":"Spurlock","year":"2019","journal-title":"Transp Res Part D: Transp Environ"},{"key":"10.1016\/j.array.2026.100743_b51","article-title":"Traffic conflict prediction using connected vehicle data","volume":"39","author":"Islam","year":"2023","journal-title":"Anal Methods Accid Res"}],"container-title":["Array"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2590005626000664?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2590005626000664?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T09:07:34Z","timestamp":1777367254000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S2590005626000664"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":51,"alternative-id":["S2590005626000664"],"URL":"https:\/\/doi.org\/10.1016\/j.array.2026.100743","relation":{},"ISSN":["2590-0056"],"issn-type":[{"value":"2590-0056","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Studying the impact of data preprocessing, hyperparameter tuning and machine learning algorithms in crash prediction explainability","name":"articletitle","label":"Article Title"},{"value":"Array","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.array.2026.100743","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier Inc.","name":"copyright","label":"Copyright"}],"article-number":"100743"}}