{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T13:10:08Z","timestamp":1759065008075,"version":"3.44.0"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032065926","type":"print"},{"value":"9783032065933","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06593-3_8","type":"book-chapter","created":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T12:37:45Z","timestamp":1759063065000},"page":"81-90","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Label-Free Estimation of\u00a0Clinically Relevant Performance Metrics Under Distribution Shifts"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0382-4675","authenticated-orcid":false,"given":"Tim","family":"Fl\u00fchmann","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2293-6160","authenticated-orcid":false,"given":"Alceu","family":"Bissoto","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4743-3246","authenticated-orcid":false,"given":"Trung-Dung","family":"Hoang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4377-7074","authenticated-orcid":false,"given":"Lisa\u00a0M.","family":"Koch","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,29]]},"reference":[{"issue":"6","key":"8_CR1","doi-asserted-by":"publisher","first-page":"e271","DOI":"10.1016\/S2589-7500(19)30123-2","volume":"1","author":"X Liu","year":"2019","unstructured":"Liu, X., et al.: A comparison of deep learning performance against health-care professionals in detecting diseases from medical imaging: a systematic review and meta-analysis. Lancet Digit. Health 1(6), e271\u2013e297 (2019)","journal-title":"Lancet Digit. Health"},{"issue":"1","key":"8_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41746-022-00592-y","volume":"5","author":"G Varoquaux","year":"2022","unstructured":"Varoquaux, G., Cheplygina, V.: Machine learning for medical imaging: methodological failures and recommendations for the future. NPJ Digit. Med. 5(1), 1\u20138 (2022)","journal-title":"NPJ Digit. Med."},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Oakden-Rayner, L., Dunnmon, J., Carneiro, G., R\u00e9, C.: Hidden stratification causes clinically meaningful failures in machine learning for medical imaging. In: ACM Conference on Health, Inference, and Learning (CHIL), pp. 151\u2013159 (2020)","DOI":"10.1145\/3368555.3384468"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Guillory, D., Shankar, V., Ebrahimi, S., Darrell, T., Schmidt, L.: Predicting with confidence on unseen distributions. In: International Conference on Computer Vision (ICCV), pp. 1134\u20131144 (2021)","DOI":"10.1109\/ICCV48922.2021.00117"},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"Fan, W., Davidson, I.: Reverse testing: an efficient framework to select amongst classifiers under sample selection bias. In: ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 147\u2013156 (2006)","DOI":"10.1145\/1150402.1150422"},{"key":"8_CR6","unstructured":"Bia\u0142ek, J., Kuberski, W., Perrakis, N., Bifet, A.: Estimating model performance under covariate shift without labels. arXiv preprint arXiv:2401.08348 (2024)"},{"key":"8_CR7","unstructured":"Garg, S., Balakrishnan, S., Lipton, Z.C., Neyshabur, B., Sedghi, H.: Leveraging unlabeled data to predict out-of-distribution performance. In: NeurIPS Workshop on Distribution Shifts: Connecting Methods and Applications (2021)"},{"key":"8_CR8","doi-asserted-by":"crossref","unstructured":"Li, Z., Kamnitsas, K., Islam, M., Chen, C., Glocker, B.: Estimating model performance under domain shifts with class-specific confidence scores. In: International Conference on Medical Image Computing and Computer-Assisted Intervention (MICCAI), pp. 693\u2013703. Springer (2022)","DOI":"10.1007\/978-3-031-16449-1_66"},{"key":"8_CR9","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1613\/jair.1.16709","volume":"82","author":"J Kivim\u00e4ki","year":"2025","unstructured":"Kivim\u00e4ki, J., Nurminen, J.K., Bia\u0142ek, J., Kuberski, W.: Confidence-based estimators for predictive performance in model monitoring. J. Artif. Intell. Res. 82, 209\u2013240 (2025)","journal-title":"J. Artif. Intell. Res."},{"key":"8_CR10","unstructured":"Kivim\u00e4ki, J., Bia\u0142ek, J., Kuberski, W., Nurminen, J.K.: Performance estimation in binary classification using calibrated confidence. arXiv preprint arXiv:2505.05295 (2025)"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"Elsahar, H., Gall\u00e9, M.: To annotate or not? predicting performance drop under domain shift. In: Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 2163\u20132173 (2019)","DOI":"10.18653\/v1\/D19-1222"},{"key":"8_CR12","unstructured":"Baek, C., Jiang, Y., Raghunathan, A., Kolter, J.Z.: Agreement-on-the-line: predicting the performance of neural networks under distribution shift. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 35, pp. 19274\u201319289 (2022)"},{"issue":"2","key":"8_CR13","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1038\/s41592-023-02151-z","volume":"21","author":"L Maier-Hein","year":"2024","unstructured":"Maier-Hein, L., et al.: Metrics reloaded: recommendations for image analysis validation. Nat. Methods 21(2), 195\u2013212 (2024)","journal-title":"Nat. Methods"},{"key":"8_CR14","unstructured":"Guo, C., Pleiss, G., Sun, Y., Weinberger, K.Q.: On calibration of modern neural networks. In: International Conference on Machine Learning (ICML), pp. 1321\u20131330 (2017)"},{"key":"8_CR15","unstructured":"Hendrycks, D., Gimpel, K.: A baseline for detecting misclassified and out-of-distribution examples in neural networks. In: International Conference on Learning Representations (ICLR) (2017)"},{"key":"8_CR16","unstructured":"Chambon, P., et al.: Chexpert plus: augmenting a large chest x-ray dataset with text radiology reports, patient demographics and additional image formats. arXiv preprint arXiv:2405.19538 (2024)"},{"key":"8_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2020.101797","volume":"66","author":"A Bustos","year":"2020","unstructured":"Bustos, A., Pertusa, A., Salinas, J.M., De La Iglesia-Vaya, M.: Padchest: a large chest x-ray image dataset with multi-label annotated reports. Med. Image Anal. 66, 101797 (2020)","journal-title":"Med. Image Anal."},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Wang, X., Peng, Y., Lu, L., Lu, Z., Bagheri, M., Summers, R.M.: Chestx-ray8: hospital-scale chest x-ray database and benchmarks on weakly-supervised classification and localization of common thorax diseases. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2097\u20132106 (2017)","DOI":"10.1109\/CVPR.2017.369"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Sun, S., Koch, L.M., Baumgartner, C.F.: Right for the wrong reason: can interpretable ml techniques detect spurious correlations? In: Medical Image Computing and Computer Assisted Interventions (MICCAI) (2023)","DOI":"10.1007\/978-3-031-43895-0_40"},{"key":"8_CR20","unstructured":"Cohen, J.P., Hashir, M., Brooks, R., Bertrand, H.: On the limits of cross-domain generalization in automated x-ray prediction. In: Medical Imaging with Deep Learning (MIDL), pp. 136\u2013155 (2020)"},{"key":"8_CR21","doi-asserted-by":"crossref","unstructured":"Koch, L.M., Baumgartner, C.F., Berens, P.: Distribution shift detection for the postmarket surveillance of medical AI algorithms: A retrospective simulation study. NPJ Digit. Med. (2024)","DOI":"10.1038\/s41746-024-01085-w"},{"key":"8_CR22","doi-asserted-by":"crossref","unstructured":"Roschewitz, M., Mehta, R., Jones, C., Glocker, B.: Automatic dataset shift identification to support safe deployment of medical imaging AI. arXiv preprint arXiv:2411.07940 (2024)","DOI":"10.1007\/978-3-032-04981-0_7"},{"key":"8_CR23","unstructured":"Alexandari, A., Kundaje, A., Shrikumar, A.: Maximum likelihood with bias-corrected calibration is hard-to-beat at label shift adaptation. In: International Conference on Machine Learning (ICML), pp. 222\u2013232 (2020)"}],"container-title":["Lecture Notes in Computer Science","Uncertainty for Safe Utilization of Machine Learning in Medical Imaging"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06593-3_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T12:38:00Z","timestamp":1759063080000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06593-3_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,29]]},"ISBN":["9783032065926","9783032065933"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06593-3_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,29]]},"assertion":[{"value":"29 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"UNSURE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Uncertainty for Safe Utilization of Machine Learning in Medical Imaging","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"unsure2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/unsuremiccai.github.io","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}