{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T17:35:25Z","timestamp":1770140125140,"version":"3.49.0"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2020,5,29]],"date-time":"2020-05-29T00:00:00Z","timestamp":1590710400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,5,29]],"date-time":"2020-05-29T00:00:00Z","timestamp":1590710400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","award":["Google Research Award Axiometrics: Foundations of Evaluation Metrics in IR"],"award-info":[{"award-number":["Google Research Award Axiometrics: Foundations of Evaluation Metrics in IR"]}],"id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Inf Retrieval J"],"published-print":{"date-parts":[[2020,6]]},"DOI":"10.1007\/s10791-020-09374-0","type":"journal-article","created":{"date-parts":[[2020,5,29]],"date-time":"2020-05-29T06:02:31Z","timestamp":1590732151000},"page":"318-386","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["On the nature of information access evaluation metrics: a unifying framework"],"prefix":"10.1007","volume":"23","author":[{"given":"Enrique","family":"Amig\u00f3","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2852-168X","authenticated-orcid":false,"given":"Stefano","family":"Mizzaro","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,5,29]]},"reference":[{"key":"9374_CR1","doi-asserted-by":"crossref","unstructured":"Agirre, E., Banea, C., Cardie, C., Cer, D., Diab, M., Gonzalez-Agirre, A., et al. (2015). SemEval-2015 Task 2: Semantic Textual Similarity, English, Spanish and Pilot on interpretability. In Proceedings of SemEval, 2015 (pp. 252\u2013263).","DOI":"10.18653\/v1\/S15-2045"},{"key":"9374_CR2","doi-asserted-by":"crossref","unstructured":"Agirre, E., Banea, C., Cer, D., Diab, M., Gonzalez-Agirre, A., Mihalcea, R., et al. (2016). Semeval-2016 task 1: Semantic textual similarity, monolingual and cross-lingual evaluation. In Proceedings of SemEval, 2016 (pp. 509\u2013523).","DOI":"10.18653\/v1\/S16-1081"},{"issue":"6","key":"9374_CR3","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1007\/s10791-019-09355-y","volume":"22","author":"E Amigo","year":"2019","unstructured":"Amigo, E., Gonzalo, J., Verdejo, F., & Spina, D. (2019). A comparison of filtering evaluation metrics based on formal constraints. Information Retrieval Journal, 22(6), 581\u2013619.","journal-title":"Information Retrieval Journal"},{"issue":"4","key":"9374_CR4","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1007\/s10791-008-9066-8","volume":"12","author":"E Amig\u00f3","year":"2009","unstructured":"Amig\u00f3, E., Gonzalo, J., Artiles, J., & Verdejo, F. (2009). A comparison of extrinsic clustering evaluation metrics based on formal constraints. Information Retrieval, 12(4), 461\u2013486.","journal-title":"Information Retrieval"},{"key":"9374_CR5","doi-asserted-by":"crossref","unstructured":"Amig\u00f3, E., Gonzalo, J., & Mizzaro, S. (2014). A general account of effectiveness metrics for information tasks: retrieval, filtering, and clustering. In Proceedings of SIGIR (pp. 1289\u20131289). ACM.","DOI":"10.1145\/2600428.2602296"},{"key":"9374_CR6","doi-asserted-by":"crossref","unstructured":"Amig\u00f3, E., Gonzalo, J., & Mizzaro, S. (2015). A formal approach to effectiveness metrics for information access: Retrieval, filtering, and clustering. In: ECIR 2015: Advances in information retrieval (pp. 817\u2013821).","DOI":"10.1007\/978-3-319-16354-3_93"},{"key":"9374_CR7","doi-asserted-by":"crossref","unstructured":"Amig\u00f3, E., Gonzalo, J., & Verdejo, F. (2011). A comparison of evaluation metrics for document filtering. In CLEF, LNCS (Vol. 6941, pp. 38\u201349). Springer.","DOI":"10.1007\/978-3-642-23708-9_6"},{"key":"9374_CR8","doi-asserted-by":"crossref","unstructured":"Amig\u00f3, E., Gonzalo, J., & Verdejo, F. (2013). A general evaluation measure for document organization tasks. In Proceedings of SIGIR (pp. 643\u2013652). ACM, New York, NY, USA.","DOI":"10.1145\/2484028.2484081"},{"key":"9374_CR9","doi-asserted-by":"crossref","unstructured":"Amig\u00f3, E., Gonzalo, J., Spina, D., & Verdejo, F. (2018). A comparison of filtering evaluation metrics based on formal constraints. Information Retrieval. (in press).","DOI":"10.1007\/s10791-019-09355-y"},{"key":"9374_CR10","doi-asserted-by":"crossref","unstructured":"Baccianella, S., Esuli, A., & Sebastiani, F. (2009). Evaluation measures for ordinal regression. In Proceedings of the 2009 ninth international conference on intelligent systems design and applications, ISDA \u201909 (pp. 283\u2013287).","DOI":"10.1109\/ISDA.2009.230"},{"key":"9374_CR11","doi-asserted-by":"crossref","unstructured":"Barbieri, F., Basile, V., Croce, D., Nissim, M., Novielli, N., & Patti, V. (2016). Overview of the Evalita 2016 SENTIment POLarity classification task. In Proceedings of third Italian conference on computational linguistics (CLiC-it 2016).","DOI":"10.4000\/books.aaccademia.1992"},{"key":"9374_CR12","unstructured":"Bollmann, P. (1984). Two axioms for evaluation measures in information retrieval. In SIGIR \u201984 (pp. 233\u2013245). Swinton: British Computer Society."},{"key":"9374_CR13","doi-asserted-by":"crossref","unstructured":"Busin, L., & Mizzaro, S. (2013). Axiometrics: An axiomatic approach to information retrieval effectiveness metrics. In Proceedings of ICTIR 2013 (pp. 22\u201329). ACM.","DOI":"10.1145\/2499178.2499182"},{"issue":"08","key":"9374_CR14","doi-asserted-by":"publisher","first-page":"1173","DOI":"10.1142\/S0218001411009093","volume":"25","author":"JS Cardoso","year":"2011","unstructured":"Cardoso, J. S., & Sousa, R. (2011). Measuring the performance of ordinal classification. International Journal of Pattern Recognition and Artificial Intelligence, 25(08), 1173\u20131195. https:\/\/doi.org\/10.1142\/S0218001411009093.","journal-title":"International Journal of Pattern Recognition and Artificial Intelligence"},{"issue":"6","key":"9374_CR15","doi-asserted-by":"publisher","first-page":"530","DOI":"10.1002\/asi.10408","volume":"55","author":"V Della Mea","year":"2004","unstructured":"Della Mea, V., & Mizzaro, S. (2004). Measuring retrieval effectiveness: A new proposal and a first experimental validation. Journal of the American Society for Information Science, 55(6), 530\u2013543.","journal-title":"Journal of the American Society for Information Science"},{"key":"9374_CR16","unstructured":"Dom, B. (2001). An information-theoretic external cluster-validity measure. IBM Research Report. http:\/\/citeseer.ist.psu.edu\/dom01informationtheoretic.html."},{"key":"9374_CR17","doi-asserted-by":"crossref","unstructured":"Ferrante, M., Ferro, N., & Maistro, M. (2015). Towards a formal framework for utility-oriented measurements of retrieval effectiveness. In Proceedings of ICTIR (pp. 21\u201330).","DOI":"10.1145\/2808194.2809452"},{"key":"9374_CR18","doi-asserted-by":"crossref","unstructured":"Ferrante, M., Ferro, N., & Pontarollo, S. (2017). Are IR evaluation measures on an interval scale? In Proceedings of ACM ICTIR. (pp. 67\u201374). ACM.","DOI":"10.1145\/3121050.3121058"},{"issue":"3","key":"9374_CR19","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1109\/TKDE.2018.2840708","volume":"31","author":"M Ferrante","year":"2019","unstructured":"Ferrante, M., Ferro, N., & Pontarollo, S. (2019). A general theory of IR evaluation measures. IEEE Transactions on Knowledge & Data Engineering, 31(3), 409\u2013422.","journal-title":"IEEE Transactions on Knowledge & Data Engineering"},{"issue":"1","key":"9374_CR20","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1016\/j.patrec.2008.08.010","volume":"30","author":"C Ferri","year":"2009","unstructured":"Ferri, C., Hern\u00e1ndez-Orallo, J., & Modroiu, R. (2009). An experimental comparison of performance measures for classification. Pattern Recognition Letters, 30(1), 27\u201338.","journal-title":"Pattern Recognition Letters"},{"issue":"3","key":"9374_CR21","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1145\/3190580.3190586","volume":"51","author":"N Fuhr","year":"2018","unstructured":"Fuhr, N. (2018). Some common mistakes in IR evaluation, and how they can be avoided. SIGIR Forum, 51(3), 32\u201341.","journal-title":"SIGIR Forum"},{"key":"9374_CR22","volume-title":"Advances in Artificial Intelligence. Canadian AI 2009. Lecture Notes in Computer Science","author":"L Gaudette","year":"2009","unstructured":"Gaudette, L., & Japkowicz, N. (2009). Evaluation methods for ordinal classification. In Y. Gao & N. Japkowicz (Eds.), Advances in Artificial Intelligence. Canadian AI 2009. Lecture Notes in Computer Science (Vol. 5549). Berlin, Heidelberg: Springer."},{"issue":"5","key":"9374_CR23","doi-asserted-by":"publisher","first-page":"74:1","DOI":"10.1145\/3117807","volume":"50","author":"P Gonz\u00e1lez","year":"2017","unstructured":"Gonz\u00e1lez, P., Casta\u00f1o, A., Chawla, N. V., & Coz, J. J. D. (2017). A review on quantification learning. ACM Computing Surveys, 50(5), 74:1\u201374:40.","journal-title":"ACM Computing Surveys"},{"issue":"2\u20133","key":"9374_CR24","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1023\/A:1012801612483","volume":"17","author":"M Halkidi","year":"2001","unstructured":"Halkidi, M., Batistakis, Y., & Vazirgiannis, M. (2001). On clustering validation techniques. Journal of Intelligent Information Systems, 17(2\u20133), 107\u2013145.","journal-title":"Journal of Intelligent Information Systems"},{"key":"9374_CR25","unstructured":"Maddalena, E., & Mizzaro, S. (2014). Axiometrics: Axioms of information retrieval effectiveness metrics. In Proceedings of the sixth EVIA workshop (pp. 17\u201324)."},{"issue":"3","key":"9374_CR26","doi-asserted-by":"publisher","first-page":"19:1","DOI":"10.1145\/3002172","volume":"35","author":"E Maddalena","year":"2017","unstructured":"Maddalena, E., Mizzaro, S., Scholer, F., & Turpin, A. (2017). On crowdsourcing relevance magnitudes for information retrieval evaluation. ACM Transactions on Information Systems (TOIS), 35(3), 19:1\u201319:32.","journal-title":"ACM Transactions on Information Systems (TOIS)"},{"key":"9374_CR27","unstructured":"Meila, M. (2003). Comparing clusterings. In Proceedings of COLT 03."},{"key":"9374_CR28","doi-asserted-by":"crossref","unstructured":"Moffat, A. (2013). Seven numeric properties of effectiveness metrics. In AIRS\u201913 (pp. 1\u201312).","DOI":"10.1007\/978-3-642-45068-6_1"},{"key":"9374_CR29","volume-title":"Measurement, design, and analysis: an integrated approach","author":"EJ Pedhazur","year":"1991","unstructured":"Pedhazur, E. J., & Schmelkin, L. P. (1991). Measurement, design, and analysis: an integrated approach. Newark: Lawrence Erlbaum Associates."},{"key":"9374_CR30","doi-asserted-by":"crossref","unstructured":"Qi, H., Yang, M., He, X., & Li, S. (2010). Re-examination on Lam% in spam filtering. In Proceedings of SIGIR.","DOI":"10.1145\/1835449.1835601"},{"key":"9374_CR31","unstructured":"Reimers, N., Beyer, P., & Gurevych, I. (2016). Task-oriented intrinsic evaluation of semantic textual similarity. In Proceedings of COLING, 2016 (pp. 87\u201396)."},{"key":"9374_CR32","volume-title":"Measurement theory: volume 7: with applications to decisionmaking, utility, and the social sciences. Encyclopedia of mathematics and its applications","author":"F Roberts","year":"1984","unstructured":"Roberts, F. (1984). Measurement theory: volume 7: with applications to decisionmaking, utility, and the social sciences. Encyclopedia of mathematics and its applications. Cambridge: Cambridge University Press."},{"key":"9374_CR33","unstructured":"Rosenberg, A., & Hirschberg, J. (2007). V-measure: A conditional entropy-based external cluster evaluation measure. In Proceedings of EMNLP-CoNLL (pp. 410\u2013420)."},{"key":"9374_CR34","doi-asserted-by":"crossref","unstructured":"Rosenthal, S., Farra, N., & Nakov, P. (2017). SemEval-2017 task 4: Sentiment analysis in Twitter. In Proceedings of SemEval \u201917. ACL.","DOI":"10.18653\/v1\/S17-2088"},{"key":"9374_CR35","doi-asserted-by":"crossref","unstructured":"Sebastiani, F. (2015). An axiomatically derived measure for the evaluation of classification algorithms. In Proceedings of ICTIR 2015 (pp 11\u201320). ACM.","DOI":"10.1145\/2808194.2809449"},{"key":"9374_CR36","unstructured":"Sokolova, M. (2006). Assessing invariance properties of evaluation measures. In Proceedings of NIPS\u201906 workshop on testing deployable learning and decision systems"},{"issue":"2684","key":"9374_CR37","doi-asserted-by":"publisher","first-page":"677","DOI":"10.1126\/science.103.2684.677","volume":"103","author":"SS Stevens","year":"1946","unstructured":"Stevens, S. S. (1946). On the theory of scales of measurement. Science, 103(2684), 677\u201380.","journal-title":"Science"},{"key":"9374_CR38","first-page":"3","volume-title":"Basic measurement theory. Handbook of mathematical psychology","author":"P Suppes","year":"1963","unstructured":"Suppes, P., & Zinnes, J. L. (1963). Basic measurement theory. Handbook of mathematical psychology (Vol. 1, pp. 3\u201376). New York: Wiley."},{"issue":"4","key":"9374_CR39","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1108\/eb026584","volume":"30","author":"K Van Rijsbergen","year":"1974","unstructured":"Van Rijsbergen, K. (1974). Foundation of evaluation. Journal of Documentation, 30(4), 365\u2013373.","journal-title":"Journal of Documentation"},{"key":"9374_CR40","first-page":"32","volume-title":"Retrieval effectiveness","author":"K van Rijsbergen","year":"1981","unstructured":"van Rijsbergen, K. (1981). Retrieval effectiveness (pp. 32\u201343). London: Butterworths. (chap 3)."},{"key":"9374_CR41","volume-title":"Clustering and information retrieval","year":"2003","unstructured":"Wu, W., Xiong, H., & Shekhar, S. (Eds.). (2003). Clustering and information retrieval. Alphen aan den Rijn: Kluwer."},{"key":"9374_CR42","doi-asserted-by":"crossref","unstructured":"Xu, W., Liu, X., & Gong, Y. (2003). Document clustering based on non-negative matrix factorization. In Proceedings of SIGIR (pp. 267\u2013273). ACM.","DOI":"10.1145\/860435.860485"},{"key":"9374_CR43","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1002\/(SICI)1097-4571(199503)46:2<133::AID-ASI6>3.0.CO;2-Z","volume":"46","author":"Y Yao","year":"1995","unstructured":"Yao, Y. (1995). Measuring retrieval effectiveness based on user preference of documents. JASIS, 46, 133\u2013145.","journal-title":"JASIS"}],"container-title":["Information Retrieval Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-020-09374-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10791-020-09374-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-020-09374-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,2]],"date-time":"2024-01-02T14:51:01Z","timestamp":1704207061000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10791-020-09374-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5,29]]},"references-count":43,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2020,6]]}},"alternative-id":["9374"],"URL":"https:\/\/doi.org\/10.1007\/s10791-020-09374-0","relation":{},"ISSN":["1386-4564","1573-7659"],"issn-type":[{"value":"1386-4564","type":"print"},{"value":"1573-7659","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,5,29]]},"assertion":[{"value":"21 June 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 May 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 May 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}