{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T01:48:41Z","timestamp":1776476921626,"version":"3.51.2"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,11,17]],"date-time":"2022-11-17T00:00:00Z","timestamp":1668643200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,11,17]],"date-time":"2022-11-17T00:00:00Z","timestamp":1668643200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","award":["RGPIN-2022-04698"],"award-info":[{"award-number":["RGPIN-2022-04698"]}],"id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Adv Data Anal Classif"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s11634-022-00528-0","type":"journal-article","created":{"date-parts":[[2022,11,17]],"date-time":"2022-11-17T08:02:56Z","timestamp":1668672176000},"page":"927-949","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A power-controlled reliability assessment for multi-class probabilistic classifiers"],"prefix":"10.1007","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9035-984X","authenticated-orcid":false,"given":"Hyukjun","family":"Gweon","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,17]]},"reference":[{"key":"528_CR1","volume-title":"Classification and regression trees","author":"L Breiman","year":"1984","unstructured":"Breiman L (1984) Classification and regression trees. Taylor & Francis, LLC, Boca Raton, FL"},{"issue":"2","key":"528_CR2","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1023\/A:1018054314350","volume":"24","author":"L Breiman","year":"1996","unstructured":"Breiman L (1996) Bagging predictors. Mach Learn 24(2):123\u2013140. https:\/\/doi.org\/10.1023\/A:1018054314350","journal-title":"Mach Learn"},{"issue":"3","key":"528_CR3","doi-asserted-by":"publisher","first-page":"651","DOI":"10.1175\/WAF993.1","volume":"22","author":"J Br\u00f6cker","year":"2007","unstructured":"Br\u00f6cker J, Smith LA (2007) Increasing the reliability of reliability diagrams. Weather Forecast 22(3):651\u2013661","journal-title":"Weather Forecast"},{"issue":"2","key":"528_CR4","doi-asserted-by":"publisher","first-page":"298","DOI":"10.1016\/j.csda.2009.09.024","volume":"54","author":"D Cheng","year":"2010","unstructured":"Cheng D, Branscum AJ, Stamey JD (2010) A Bayesian approach to sample size determination for studies designed to evaluate continuous medical tests. Comput Stat Data Anal 54(2):298\u2013307. https:\/\/doi.org\/10.1016\/j.csda.2009.09.024","journal-title":"Comput Stat Data Anal"},{"issue":"4","key":"528_CR5","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1016\/j.cct.2007.12.001","volume":"29","author":"T Daimon","year":"2008","unstructured":"Daimon T (2008) Bayesian sample size calculations for a non-inferiority test of two proportions in clinical trials. Contemp Clin Trials 29(4):507\u2013516. https:\/\/doi.org\/10.1016\/j.cct.2007.12.001","journal-title":"Contemp Clin Trials"},{"key":"528_CR6","unstructured":"Dua D, Graff C (2017) UCI machine learning repository. http:\/\/archive.ics.uci.edu\/ml"},{"issue":"21","key":"528_CR7","doi-asserted-by":"publisher","first-page":"4238","DOI":"10.1002\/sim.3202","volume":"27","author":"MW Fagerland","year":"2008","unstructured":"Fagerland MW, Hosmer DW, Bofin AM (2008) Multinomial goodness-of-fit tests for logistic regression models. Stat Med 27(21):4238\u20134253. https:\/\/doi.org\/10.1002\/sim.3202","journal-title":"Stat Med"},{"key":"528_CR8","unstructured":"Fix E, Hodges J (1951) Discriminatory analysis, nonparametric discrimination: Consistency properties. Technical report, USAF School of Aviation Medivine, Randolph Field, Texas, project 21-49-004, Rept. 4, Contract AF41(128)-31, February 1951"},{"key":"528_CR9","unstructured":"Gerrard DJ (1969) Competition quotient: A new measure of the competition affecting individual forest trees. Research Bulletin No. 20, Agricultural Experimental Station, Michigan State University"},{"key":"528_CR10","doi-asserted-by":"publisher","first-page":"687","DOI":"10.1016\/j.patrec.2019.07.012","volume":"125","author":"H Gweon","year":"2019","unstructured":"Gweon H, Yu H (2019) How reliable is your reliability diagram? Pattern Recogn Lett 125:687\u2013693. https:\/\/doi.org\/10.1016\/j.patrec.2019.07.012","journal-title":"Pattern Recogn Lett"},{"issue":"4","key":"528_CR11","doi-asserted-by":"publisher","first-page":"1039","DOI":"10.1080\/03610918.2017.1303727","volume":"47","author":"HA Hamid","year":"2018","unstructured":"Hamid HA, Wah Y, Xie X et al (2018) Investigating the power of goodness-of-fit tests for multinomial logistic regression. Commun Stat Simul Comput 47(4):1039\u20131055. https:\/\/doi.org\/10.1080\/03610918.2017.1303727","journal-title":"Commun Stat Simul Comput"},{"issue":"1","key":"528_CR12","doi-asserted-by":"publisher","first-page":"100","DOI":"10.2307\/2346830","volume":"28","author":"JA Hartigan","year":"1979","unstructured":"Hartigan JA, Wong MA (1979) A k-means clustering algorithm. J Roy Stat Soc: Ser C (Appl Stat) 28(1):100\u2013108. https:\/\/doi.org\/10.2307\/2346830","journal-title":"J Roy Stat Soc: Ser C (Appl Stat)"},{"issue":"10","key":"528_CR13","doi-asserted-by":"publisher","first-page":"1043","DOI":"10.1080\/03610928008827941","volume":"9","author":"DW Hosmer","year":"1980","unstructured":"Hosmer DW, Lemeshow S (1980) Goodness of fit tests for the multiple logistic regression model. Commun Stat Theory Methods 9(10):1043\u20131069. https:\/\/doi.org\/10.1080\/03610928008827941","journal-title":"Commun Stat Theory Methods"},{"key":"528_CR14","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1136\/amiajnl-2011-000291","volume":"19","author":"X Jiang","year":"2012","unstructured":"Jiang X, Osl M, Kim J et al (2012) Calibrating predictive model estimates to support personalized medicine. J Am Med Inform Assoc 19:263\u2013274","journal-title":"J Am Med Inform Assoc"},{"key":"528_CR15","unstructured":"Kumar A, Sarawagi S, Jain U (2018) Trainable calibration measures for neural networks from kernel mean embeddings. In: Dy J, Krause A (eds) Proceedings of the 35th international conference on machine learning, Stockholmsm\u00e4ssan, Stockholm Sweden, pp 2805\u20132814"},{"key":"528_CR16","unstructured":"Lloyd S (1957) Least squares quantization in pcm. Technical report RR-5497, Bell Lab"},{"issue":"1","key":"528_CR17","first-page":"41","volume":"26","author":"AH Murphy","year":"1977","unstructured":"Murphy AH, Winkler RL (1977) Reliability of subjective probability forecasts of precipitation and temperature. J Roy Stat Soc: Ser C (Appl Stat) 26(1):41\u201347","journal-title":"J Roy Stat Soc: Ser C (Appl Stat)"},{"key":"528_CR18","doi-asserted-by":"crossref","unstructured":"Naeini MP, Cooper GF, Hauskrecht M (2015) Obtaining well calibrated probabilities using bayesian binning. In: Proceedings of the 29th AAAI conference on artificial intelligence, pp 2901\u20142907","DOI":"10.1609\/aaai.v29i1.9602"},{"key":"528_CR19","doi-asserted-by":"crossref","unstructured":"Niculescu-Mizil A, Caruana R (2005) Predicting good probabilities with supervised learning. In: Proceedings of the 22nd international conference on machine learning. ACM, New York, NY, USA, pp 625\u2013632","DOI":"10.1145\/1102351.1102430"},{"issue":"1","key":"528_CR20","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1002\/sim.5525","volume":"32","author":"P Paul","year":"2013","unstructured":"Paul P, Pennell ML, Lemeshow S (2013) Standardizing the power of the Hosmer\u2013Lemeshow goodness of fit test in large data sets. Stat Med 32(1):67\u201380. https:\/\/doi.org\/10.1002\/sim.5525","journal-title":"Stat Med"},{"issue":"2","key":"528_CR21","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1111\/1467-9884.00347","volume":"52","author":"T Pham-Gia","year":"2003","unstructured":"Pham-Gia T, Turkkan N (2003) Determination of exact sample sizes in the Bayesian estimation of the difference of two proportions. J Royal Stat Soc Ser D (The Statistician) 52(2):131\u2013150. https:\/\/doi.org\/10.1111\/1467-9884.00347","journal-title":"J Royal Stat Soc Ser D (The Statistician)"},{"issue":"1","key":"528_CR22","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1002\/(SICI)1521-4036(199903)41:1<71::AID-BIMJ71>3.0.CO;2-O","volume":"41","author":"JG Pigeon","year":"1999","unstructured":"Pigeon JG, Heyse JF (1999) An improved goodness of fit statistic for probability prediction models. Biom J 41(1):71\u201382","journal-title":"Biom J"},{"key":"528_CR23","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1016\/j.csda.2012.11.001","volume":"60","author":"G Rauch","year":"2013","unstructured":"Rauch G, Kieser M (2013) An expected power approach for the assessment of composite endpoints and their components. Comput Stat Data Anal 60:111\u2013122. https:\/\/doi.org\/10.1016\/j.csda.2012.11.001","journal-title":"Comput Stat Data Anal"},{"key":"528_CR24","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-4578-0","volume-title":"Goodness-of-fit statistics for discrete multivariate data","author":"T Read","year":"1988","unstructured":"Read T, Cressie N (1988) Goodness-of-fit statistics for discrete multivariate data. Springer, New York"},{"issue":"1","key":"528_CR25","first-page":"1","volume":"6","author":"B Settles","year":"2012","unstructured":"Settles B (2012) Active learning. Synth Lect Artif Intell Mach Learn 6(1):1\u2013114","journal-title":"Synth Lect Artif Intell Mach Learn"},{"key":"528_CR26","unstructured":"Vaicenavicius J, Widmann D, Andersson C, et\u00a0al (2019) Evaluating model calibration in classification. In: Proceedings of the 22nd international conference on artificial intelligence and statistics, pp 3459\u20133467"},{"key":"528_CR27","unstructured":"Widmann D, Lindsten F, Zachariah D (2019) Calibration tests in multi-class classification: A unifying framework. In: Advances in neural information processing systems, pp 12,236 \u2013 12,246"},{"key":"528_CR28","unstructured":"Widmann D, Lindsten F, Zachariah D (2021) Calibration tests beyond classification. In: Proceedings of the 9th international conference on learning representations"}],"container-title":["Advances in Data Analysis and Classification"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11634-022-00528-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11634-022-00528-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11634-022-00528-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,19]],"date-time":"2023-10-19T15:21:44Z","timestamp":1697728904000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11634-022-00528-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,17]]},"references-count":28,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["528"],"URL":"https:\/\/doi.org\/10.1007\/s11634-022-00528-0","relation":{},"ISSN":["1862-5347","1862-5355"],"issn-type":[{"value":"1862-5347","type":"print"},{"value":"1862-5355","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,11,17]]},"assertion":[{"value":"1 January 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 November 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 November 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 November 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}