{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T07:20:00Z","timestamp":1771658400097,"version":"3.50.1"},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2018,8,3]],"date-time":"2018-08-03T00:00:00Z","timestamp":1533254400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Data Sci Anal"],"published-print":{"date-parts":[[2019,6]]},"DOI":"10.1007\/s41060-018-0148-4","type":"journal-article","created":{"date-parts":[[2018,8,3]],"date-time":"2018-08-03T06:09:40Z","timestamp":1533276580000},"page":"247-257","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Should significance testing be abandoned in machine learning?"],"prefix":"10.1007","volume":"7","author":[{"given":"Daniel","family":"Berrar","sequence":"first","affiliation":[]},{"given":"Werner","family":"Dubitzky","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,8,3]]},"reference":[{"key":"148_CR1","doi-asserted-by":"publisher","first-page":"452","DOI":"10.1038\/533452a","volume":"533","author":"M Baker","year":"2016","unstructured":"Baker, M.: Is there a reproducibility crisis? Nature 533, 452\u2013454 (2016)","journal-title":"Nature"},{"key":"148_CR2","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1093\/biomet\/44.3-4.533","volume":"44","author":"M Bartlett","year":"1957","unstructured":"Bartlett, M.: A comment on D.V. Lindley\u2019s statistical paradox. Biometrika 44, 533\u2013534 (1957)","journal-title":"Biometrika"},{"issue":"452","key":"148_CR3","first-page":"1127","volume":"95","author":"M Bayarri","year":"2000","unstructured":"Bayarri, M., Berger, J.: $$P$$ P values for composite null models. J. Am. Stat. Assoc. 95(452), 1127\u20131142 (2000)","journal-title":"J. Am. Stat. Assoc."},{"issue":"1","key":"148_CR4","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1161\/CIRCRESAHA.114.303819","volume":"116","author":"C Begley","year":"2015","unstructured":"Begley, C., Ioannidis, J.: Reproducibility in science: improving the standard for basic and preclinical research. Circ. Res. 116(1), 116\u2013126 (2015)","journal-title":"Circ. Res."},{"issue":"77","key":"148_CR5","first-page":"1","volume":"18","author":"A Benavoli","year":"2017","unstructured":"Benavoli, A., Corani, G., Dem\u0161ar, J., Zaffalon, M.: Time for a change: a tutorial for comparing multiple classifiers through Bayesian analysis. J. Mach. Learn. Res. 18(77), 1\u201336 (2017)","journal-title":"J. Mach. Learn. Res."},{"issue":"5","key":"148_CR6","first-page":"1","volume":"17","author":"A Benavoli","year":"2016","unstructured":"Benavoli, A., Corani, G., Mangili, F.: Should we really use post-hoc tests based on mean-ranks? J. Mach. Learn. Res. 17(5), 1\u201310 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"148_CR7","first-page":"159","volume":"76","author":"J Berger","year":"1988","unstructured":"Berger, J., Berry, D.: Statistical analysis and the illusion of objectivity. Am. Sci. 76, 159\u2013165 (1988)","journal-title":"Am. Sci."},{"issue":"3","key":"148_CR8","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1214\/ss\/1177013238","volume":"2","author":"J Berger","year":"1987","unstructured":"Berger, J., Delampady, M.: Testing precise hypotheses. Stat. Sci. 2(3), 317\u2013352 (1987)","journal-title":"Stat. Sci."},{"issue":"6","key":"148_CR9","doi-asserted-by":"publisher","first-page":"911","DOI":"10.1007\/s10994-016-5612-6","volume":"106","author":"D Berrar","year":"2017","unstructured":"Berrar, D.: Confidence curves: an alternative to null hypothesis significance testing for the comparison of classifiers. Mach. Learn. 106(6), 911\u2013949 (2017)","journal-title":"Mach. Learn."},{"key":"148_CR10","unstructured":"Berrar, D., Dubitzky, W.: Jeffreys\u2013Lindley Paradox in Machine Learning (2017). http:\/\/doi.org\/10.17605\/OSF.IO\/SNXWJ . Accessed 23 July 2018"},{"key":"148_CR11","doi-asserted-by":"crossref","unstructured":"Berrar, D., Dubitzky, W.: On the Jeffreys\u2013Lindley paradox and the looming reproducibility crisis in machine learning. In: Proceedings of the 2017 IEEE International Conference on Data Science and Advanced Analytics, pp. 334\u2013340 (2017)","DOI":"10.1109\/DSAA.2017.3"},{"issue":"2","key":"148_CR12","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1007\/s41060-017-0057-y","volume":"4","author":"D Berrar","year":"2017","unstructured":"Berrar, D., Lopes, P., Dubitzky, W.: Caveats and pitfalls in crowdsourcing research: the case of soccer referee bias. Int. J. Data Sci. Anal. 4(2), 143\u2013151 (2017)","journal-title":"Int. J. Data Sci. Anal."},{"issue":"1","key":"148_CR13","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L.: Random forests. Mach. Learn. 45(1), 5\u201332 (2001)","journal-title":"Mach. Learn."},{"issue":"12","key":"148_CR14","doi-asserted-by":"publisher","first-page":"997","DOI":"10.1037\/0003-066X.49.12.997","volume":"49","author":"J Cohen","year":"1994","unstructured":"Cohen, J.: The earth is round ( $$p <$$ p < .05). Am. Psychol. 49(12), 997\u20131003 (1994)","journal-title":"Am. Psychol."},{"issue":"2","key":"148_CR15","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/s11229-014-0525-z","volume":"194","author":"RD Cousins","year":"2017","unstructured":"Cousins, R.D.: The Jeffreys\u2013Lindley paradox and discovery criteria in high energy physics. Synthese 194(2), 395\u2013432 (2017)","journal-title":"Synthese"},{"key":"148_CR16","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-2887-0","volume-title":"Theoretical Statistics","author":"D Cox","year":"1974","unstructured":"Cox, D., Hinkley, D.: Theoretical Statistics. Chapman and Hall\/CR, London (1974)"},{"key":"148_CR17","volume-title":"Understanding the New Statistics: Effect Sizes, Confidence Intervals, and Meta-Analysis","author":"G Cummings","year":"2012","unstructured":"Cummings, G.: Understanding the New Statistics: Effect Sizes, Confidence Intervals, and Meta-Analysis. Routledge, New York (2012)"},{"key":"148_CR18","first-page":"1","volume":"7","author":"J Dem\u0161ar","year":"2006","unstructured":"Dem\u0161ar, J.: Statistical comparisons of classifiers over multiple data sets. J. Mach. Learn. Res. 7, 1\u201330 (2006)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"148_CR19","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1111\/j.2517-6161.1955.tb00180.x","volume":"17","author":"R Fisher","year":"1955","unstructured":"Fisher, R.: Statistical methods and scientific induction. J. R. Stat. Soc. Ser. B 17(1), 69\u201378 (1955)","journal-title":"J. R. Stat. Soc. Ser. B"},{"key":"148_CR20","doi-asserted-by":"publisher","unstructured":"Foster, E., Deardorff, A.: Open Science Framework (OSF). J. Med. Libr. Assoc. JMLA 105(2), 203\u2013206 (2017). https:\/\/doi.org\/10.5195\/jmla.2017.88 . Accessed 23 July 2018","DOI":"10.5195\/jmla.2017.88"},{"key":"148_CR21","unstructured":"Gelman, A., Loken, E.: The garden of forking paths: why multiple comparisons can be a problem, even when there is no \u201cfishing expedition\u201d or \u201cp-hacking\u201d and the research hypothesis was posited ahead of time (2013). http:\/\/www.stat.columbia.edu\/~gelman\/research\/unpublished\/p_hacking.pdf . Accessed 23 July 2018"},{"key":"148_CR22","doi-asserted-by":"publisher","first-page":"587","DOI":"10.1016\/j.socec.2004.09.033","volume":"33","author":"G Gigerenzer","year":"2004","unstructured":"Gigerenzer, G.: Mindless statistics. J. Socio-Econ. 33, 587\u2013606 (2004)","journal-title":"J. Socio-Econ."},{"issue":"12","key":"148_CR23","doi-asserted-by":"publisher","first-page":"995","DOI":"10.7326\/0003-4819-130-12-199906150-00008","volume":"130","author":"S Goodman","year":"1999","unstructured":"Goodman, S.: Toward evidence-based medical statistics. 1: the $$P$$ P value fallacy. Ann. Intern. Med. 130(12), 995\u20131004 (1999)","journal-title":"Ann. Intern. Med."},{"issue":"3","key":"148_CR24","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1053\/j.seminhematol.2008.04.003","volume":"45","author":"S Goodman","year":"2008","unstructured":"Goodman, S.: A dirty dozen: twelve $$P$$ P -value misconceptions. Semin. Hematol. 45(3), 135\u2013140 (2008)","journal-title":"Semin. Hematol."},{"issue":"12","key":"148_CR25","doi-asserted-by":"publisher","first-page":"1568","DOI":"10.2105\/AJPH.78.12.1568","volume":"78","author":"S Goodman","year":"1988","unstructured":"Goodman, S., Royall, R.: Evidence and scientific research. Am. J. Public Health 78(12), 1568\u20131574 (1988)","journal-title":"Am. J. Public Health"},{"issue":"4","key":"148_CR26","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/s10654-016-0149-3","volume":"31","author":"S Greenland","year":"2016","unstructured":"Greenland, S., Senn, S.J., Rothman, K.J., Carlin, J.B., Poole, C., Goodman, S.N., Altman, D.G.: Statistical tests, $$p$$ p values, confidence intervals, and power: a guide to misinterpretations. Eur. J. Epidemiol. 31(4), 337\u2013350 (2016)","journal-title":"Eur. J. Epidemiol."},{"key":"148_CR27","volume-title":"Statistics for the Social Sciences","author":"W Hays","year":"1973","unstructured":"Hays, W.: Statistics for the Social Sciences. Holt, Rinehart & Winston, New York (1973)"},{"issue":"3","key":"148_CR28","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1177\/0959354304043638","volume":"14","author":"R Hubbard","year":"2004","unstructured":"Hubbard, R.: Alphabet soup\u2014blurring the distinctions between $$p$$ p \u2019s and $$\\alpha $$ \u03b1 \u2019s in psychological research. Theory Psychol. 14(3), 295\u2013327 (2004)","journal-title":"Theory Psychol."},{"issue":"2","key":"148_CR29","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1177\/0273475306288399","volume":"28","author":"R Hubbard","year":"2006","unstructured":"Hubbard, R., Armstrong, J.: Why we don\u2019t really know what \u201cstatistical significance\u201d means: a major educational failure. J. Mark. Edu. 28(2), 114\u2013120 (2006)","journal-title":"J. Mark. Edu."},{"issue":"1","key":"148_CR30","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1177\/0959354307086923","volume":"18","author":"R Hubbard","year":"2008","unstructured":"Hubbard, R., Lindsay, R.: Why $$p$$ p values are not a useful measure of evidence in statistical significance testing. Theory Psychol. 18(1), 69\u201388 (2008)","journal-title":"Theory Psychol."},{"issue":"8","key":"148_CR31","doi-asserted-by":"publisher","first-page":"e124","DOI":"10.1371\/journal.pmed.0020124","volume":"2","author":"J Ioannidis","year":"2005","unstructured":"Ioannidis, J.: Why most published research findings are false. PLoS Med. 2(8), e124 (2005)","journal-title":"PLoS Med."},{"key":"148_CR32","volume-title":"Theory of Probability","author":"H Jeffreys","year":"1961","unstructured":"Jeffreys, H.: Theory of Probability, 3rd edn. Clarendon Press, Oxford (1961). (Reprinted 2003)","edition":"3"},{"key":"148_CR33","doi-asserted-by":"publisher","first-page":"557","DOI":"10.1038\/d41586-017-07522-z","volume":"551","author":"J Leek","year":"2017","unstructured":"Leek, J., McShane, B., Gelman, A., Colquhoun, D., Nuijten, M., Goodman, S.: Five ways to fix statistics. Nature 551, 557\u2013559 (2017)","journal-title":"Nature"},{"issue":"2","key":"148_CR34","first-page":"43","volume":"5","author":"J Levin","year":"1998","unstructured":"Levin, J.: What if there were no more bickering about statistical significance tests? Res. Sch. 5(2), 43\u201353 (1998)","journal-title":"Res. Sch."},{"key":"148_CR35","unstructured":"Liaw, A., Wiener, M.: Classification and regression by randomforest. R News 2(3), 18\u201322 (2002). http:\/\/CRAN.R-project.org\/doc\/Rnews\/ . Accessed 23 July 2018"},{"key":"148_CR36","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1093\/biomet\/44.1-2.187","volume":"44","author":"D Lindley","year":"1957","unstructured":"Lindley, D.: A statistical paradox. Biometrika 44, 187\u2013192 (1957)","journal-title":"Biometrika"},{"issue":"3","key":"148_CR37","doi-asserted-by":"publisher","first-page":"1130","DOI":"10.1016\/j.jtcvs.2017.08.056","volume":"155","author":"M Lu","year":"2018","unstructured":"Lu, M., Ishwaran, H.: A prediction-based alternative to $$P$$ P values in regression models. J. Thoracic Cardiovasc. Surg. 155(3), 1130\u20131136.e4 (2018)","journal-title":"J. Thoracic Cardiovasc. Surg."},{"issue":"2","key":"148_CR38","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1111\/j.1740-9713.2017.01021.x","volume":"14","author":"R Matthews","year":"2017","unstructured":"Matthews, R., Wasserstein, R., Spiegelhalter, D.: The ASA\u2019s $$p$$ p -value statement, one year on. Significance 14(2), 38\u201341 (2017)","journal-title":"Significance"},{"key":"148_CR39","unstructured":"McShane, B.B., Gal, D., Gelman, A., Robert, C., Tackett, J.L.: Abandon Statistical Significance (2017). ArXiv e-prints 1709.07588"},{"key":"148_CR40","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1038\/506150a","volume":"506","author":"R Nuzzo","year":"2014","unstructured":"Nuzzo, R.: Statistical errors. Nature 506, 150\u2013152 (2014)","journal-title":"Nature"},{"issue":"77","key":"148_CR41","doi-asserted-by":"publisher","first-page":"195","DOI":"10.2105\/AJPH.77.2.195","volume":"2","author":"C Poole","year":"1987","unstructured":"Poole, C.: Beyond the confidence interval. Am. J. Public Health 2(77), 195\u2013199 (1987)","journal-title":"Am. J. Public Health"},{"key":"148_CR42","unstructured":"R Core Team: R: a language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria (2017). https:\/\/www.R-project.org\/ . Accessed 23 July 2018"},{"issue":"3","key":"148_CR43","doi-asserted-by":"publisher","first-page":"638","DOI":"10.1037\/0033-2909.86.3.638","volume":"86","author":"R Rosenthal","year":"1979","unstructured":"Rosenthal, R.: The file drawer problem and tolerance for null results. Psychol. Bull. 86(3), 638\u2013641 (1979)","journal-title":"Psychol. Bull."},{"issue":"3","key":"148_CR44","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1097\/00001648-199805000-00019","volume":"9","author":"K Rothman","year":"1998","unstructured":"Rothman, K.: Writing for epidemiology. Epidemiology 9(3), 333\u2013337 (1998)","journal-title":"Epidemiology"},{"key":"148_CR45","volume-title":"Modern Epidemiology","author":"K Rothman","year":"2008","unstructured":"Rothman, K., Greenland, S., Lash, T.: Modern Epidemiology, 3rd edn. Wolters Kluwer, Alphen aan den Rijn (2008)","edition":"3"},{"key":"148_CR46","doi-asserted-by":"crossref","unstructured":"Savalei, V., Dunn, E.: Is the call to abandon $$p$$ p -values the red herring of the replicability crisis? Front. Psychol. Artic. 6, 1\u20134, Article 245 (2015)","DOI":"10.3389\/fpsyg.2015.00245"},{"issue":"3","key":"148_CR47","first-page":"203","volume":"50","author":"M Schervish","year":"1996","unstructured":"Schervish, M.: $$P$$ P values: what they are and what they are not. Am. Stat. 50(3), 203\u2013206 (1996)","journal-title":"Am. Stat."},{"issue":"2","key":"148_CR48","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1037\/1082-989X.1.2.115","volume":"1","author":"F Schmidt","year":"1996","unstructured":"Schmidt, F.: Statistical significance testing and cumulative knowledge in psychology: implications for training of researchers. Psychol. Methods 1(2), 115\u2013129 (1996)","journal-title":"Psychol. Methods"},{"key":"148_CR49","unstructured":"Schmidt, F., Hunter, J.: Eight common but false objections to the discontinuation of significance testing in the analysis of research data. In: Harlow, L., Mulaik, S., Steiger, J. (eds.) What If There were No Significance Tests?, pp. 37\u201364. Psychology Press, Hove (1997)"},{"issue":"1","key":"148_CR50","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1198\/000313001300339950","volume":"55","author":"T Sellke","year":"2001","unstructured":"Sellke, T., Bayarri, M., Berger, J.: Calibration of $$p$$ p values for testing precise null hypotheses. Am. Stat. 55(1), 62\u201371 (2001)","journal-title":"Am. Stat."},{"key":"148_CR51","unstructured":"Senn, S.: Two cheers for $$p$$ p -values? J. Epidemiol. Biostat. 6, 193\u2013204 (2001)"},{"issue":"11","key":"148_CR52","doi-asserted-by":"publisher","first-page":"1359","DOI":"10.1177\/0956797611417632","volume":"22","author":"J Simmons","year":"2011","unstructured":"Simmons, J., Nelson, L., Simonsohn, U.: False-positive psychology: undisclosed flexibility in data collection and analysis allows presenting anything as significant. Psychol. Sci. 22(11), 1359\u20131366 (2011)","journal-title":"Psychol. Sci."},{"key":"148_CR53","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1080\/01973533.2015.1012991","volume":"37","author":"D Trafimow","year":"2015","unstructured":"Trafimow, D., Marks, M.: Editorial. Basic Appl. Soc. Psychol. 37, 1\u20132 (2015)","journal-title":"Basic Appl. Soc. Psychol."},{"issue":"2","key":"148_CR54","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1080\/00031305.2016.1154108","volume":"70","author":"R Wasserstein","year":"2016","unstructured":"Wasserstein, R., Lazar, N.: The ASA\u2019s statement on $$p$$ p -values: context, process, and purpose (editorial). Am. Stat. 70(2), 129\u2013133 (2016)","journal-title":"Am. Stat."},{"issue":"2","key":"148_CR55","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1007\/s10994-011-5263-6","volume":"86","author":"GI Webb","year":"2012","unstructured":"Webb, G.I., Boughton, J.R., Zheng, F., Ting, K.M., Salem, H.: Learning by extrapolation from marginal to full-multivariate probability distributions: decreasingly naive Bayesian classification. Mach. Learn. 86(2), 233\u2013272 (2012)","journal-title":"Mach. Learn."}],"container-title":["International Journal of Data Science and Analytics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-018-0148-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s41060-018-0148-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-018-0148-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,8]],"date-time":"2024-07-08T22:26:05Z","timestamp":1720477565000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s41060-018-0148-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,8,3]]},"references-count":55,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2019,6]]}},"alternative-id":["148"],"URL":"https:\/\/doi.org\/10.1007\/s41060-018-0148-4","relation":{},"ISSN":["2364-415X","2364-4168"],"issn-type":[{"value":"2364-415X","type":"print"},{"value":"2364-4168","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,8,3]]},"assertion":[{"value":"12 April 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 July 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 August 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}