{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T12:00:22Z","timestamp":1769774422100,"version":"3.49.0"},"publisher-location":"Cham","reference-count":14,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032159830","type":"print"},{"value":"9783032159847","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-15984-7_12","type":"book-chapter","created":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T20:33:50Z","timestamp":1769718830000},"page":"166-180","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mind the\u00a0Gap: Investigating the\u00a0Impact of\u00a0Data Leakage on\u00a0Machine Learning Predictive Models"],"prefix":"10.1007","author":[{"given":"Augusto Exenberger","family":"Becker","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mariana","family":"Recamonde-Mendoza","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,30]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Apicella, A., Isgr\u00f2, F., Prevete, R.: Don\u2019t push the button! exploring data leakage risks in machine learning and transfer learning. arXiv preprint arXiv:2401.13796 (2024)","DOI":"10.2139\/ssrn.4733889"},{"issue":"8","key":"12_CR2","doi-asserted-by":"publisher","first-page":"1444","DOI":"10.1038\/s41592-024-02362-y","volume":"21","author":"J Bernett","year":"2024","unstructured":"Bernett, J., Blumenthal, D.B., Grimm, D.G., Haselbeck, F., Joeres, R., Kalinina, O.V., List, M.: Guiding questions to avoid data leakage in biological machine learning applications. Nat. Methods 21(8), 1444\u20131453 (2024)","journal-title":"Nat. Methods"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Bussola, N., Marcolini, A., Maggio, V., Jurman, G., Furlanello, C.: AI slipping on tiles: data leakage in digital pathology. In: Pattern Recognition. ICPR International Workshops and Challenges: Virtual Event, 10\u201315 January 2021, Proceedings, Part I, pp. 167\u2013182. Springer (2021)","DOI":"10.1007\/978-3-030-68763-2_13"},{"key":"12_CR4","first-page":"2079","volume":"11","author":"GC Cawley","year":"2010","unstructured":"Cawley, G.C., Talbot, N.L.C.: On over-fitting in model selection and subsequent selection bias in performance evaluation. J. Mach. Learn. Res. 11, 2079\u20132107 (2010)","journal-title":"J. Mach. Learn. Res."},{"key":"12_CR5","unstructured":"Drobnjakovi\u0107, F., Suboti\u0107, P., Urban, C.: Abstract interpretation-based data leakage static analysis. arXiv preprint arXiv:2211.16073 (2022)"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Farrow, E., Moore, J., Ga\u0161evi\u0107, D.: Analysing discussion forum data: a replication study avoiding data contamination. In: Proceedings of the 9th International Conference on Learning Analytics & Knowledge, LAK19, pp. 170\u2013179. ACM, New York, NY, USA (2019)","DOI":"10.1145\/3303772.3303779"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Kapoor, S., Narayanan, A.: Leakage and the reproducibility crisis in machine-learning-based science. Patterns 4(9) (2023)","DOI":"10.1016\/j.patter.2023.100804"},{"issue":"4","key":"12_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2382577.2382579","volume":"6","author":"S Kaufman","year":"2012","unstructured":"Kaufman, S., Rosset, S., Perlich, C., Stitelman, O.: Leakage in data mining: formulation, detection, and avoidance. ACM Trans. Knowl. Disc. Data (TKDD) 6(4), 1\u201321 (2012)","journal-title":"ACM Trans. Knowl. Disc. Data (TKDD)"},{"key":"12_CR9","unstructured":"Mayer, I., Sportisse, A., Josse, J., Tierney, N., Vialaneix, N.: R-miss-tastic: a unified platform for missing values methods and workflows. arXiv preprint arXiv:1908.04822 (2019)"},{"issue":"3","key":"12_CR10","doi-asserted-by":"publisher","first-page":"878","DOI":"10.1093\/bioinformatics\/btab727","volume":"38","author":"JD Romano","year":"2022","unstructured":"Romano, J.D., et al.: Pmlb v1. 0: an open-source dataset collection for benchmarking machine learning methods. Bioinformatics 38(3), 878\u2013880 (2022)","journal-title":"Bioinformatics"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Samala, R.K., Chan, H.P., Hadjiiski, L., Koneru, S.: Hazards of data leakage in machine learning: a study on classification of breast cancer using deep neural networks. In: Medical Imaging 2020: Computer-Aided Diagnosis, vol. 11314, pp. 279\u2013284. SPIE (2020)","DOI":"10.1117\/12.2549313"},{"issue":"1","key":"12_CR12","doi-asserted-by":"publisher","first-page":"7980","DOI":"10.1038\/s41598-021-87157-3","volume":"11","author":"M Shim","year":"2021","unstructured":"Shim, M., Lee, S.H., Hwang, H.J.: Inflated prediction accuracy of neuropsychiatric biomarkers caused by data leakage in feature selection. Sci. Rep. 11(1), 7980 (2021)","journal-title":"Sci. Rep."},{"issue":"1","key":"12_CR13","doi-asserted-by":"publisher","first-page":"22544","DOI":"10.1038\/s41598-021-01681-w","volume":"11","author":"E Yagis","year":"2021","unstructured":"Yagis, E., et al.: Effect of data leakage in brain MRI classification using 2D convolutional neural networks. Sci. Rep. 11(1), 22544 (2021)","journal-title":"Sci. Rep."},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Yang, C., Brower-Sinning, R.A., Lewis, G., K\u00e4stner, C.: Data leakage in notebooks: static detection and better processes. In: Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering, pp. 1\u201312 (2022)","DOI":"10.1145\/3551349.3556918"}],"container-title":["Lecture Notes in Computer Science","Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-15984-7_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T20:33:53Z","timestamp":1769718833000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-15984-7_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032159830","9783032159847"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-15984-7_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"30 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"BRACIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazilian Conference on Intelligent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Fortaleza-CE","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazil","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"bracis2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/bracis.sbc.org.br\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}