{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T02:00:59Z","timestamp":1768269659907,"version":"3.49.0"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2016,6,21]],"date-time":"2016-06-21T00:00:00Z","timestamp":1466467200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2016,6,21]],"date-time":"2016-06-21T00:00:00Z","timestamp":1466467200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"crossref","award":["DP140101587"],"award-info":[{"award-number":["DP140101587"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Inf Retrieval J"],"published-print":{"date-parts":[[2016,8]]},"DOI":"10.1007\/s10791-016-9282-6","type":"journal-article","created":{"date-parts":[[2016,6,21]],"date-time":"2016-06-21T10:15:07Z","timestamp":1466504107000},"page":"416-445","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":50,"title":["The effect of pooling and evaluation depth on IR metrics"],"prefix":"10.1007","volume":"19","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2092-5460","authenticated-orcid":false,"given":"Xiaolu","family":"Lu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6638-0232","authenticated-orcid":false,"given":"Alistair","family":"Moffat","sequence":"additional","affiliation":[]},{"given":"J. Shane","family":"Culpepper","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,6,21]]},"reference":[{"key":"9282_CR1","doi-asserted-by":"crossref","unstructured":"Aslam, J. A., Pavlu, V., & Yilmaz, E. (2006). A statistical method for system evaluation using incomplete judgments. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 541\u2013548).","DOI":"10.1145\/1148170.1148263"},{"key":"9282_CR2","doi-asserted-by":"crossref","unstructured":"Aslam, J. A., Yilmaz, E., & Pavlu, V. (2005). The maximum entropy method for analyzing retrieval measures. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 27\u201334).","DOI":"10.1145\/1076034.1076042"},{"key":"9282_CR3","doi-asserted-by":"crossref","unstructured":"Bailey, P., Moffat, A., Scholer, F., & Thomas, P. (2015). User variability and IR system evaluation. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 625\u2013634).","DOI":"10.1145\/2766462.2767728"},{"key":"9282_CR4","doi-asserted-by":"crossref","unstructured":"Buckley, C., & Voorhees, E. M. (2000). Evaluating evaluation measure stability. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 33\u201340).","DOI":"10.1145\/345508.345543"},{"key":"9282_CR5","doi-asserted-by":"crossref","unstructured":"Buckley, C., & Voorhees, E. M. (2004). Retrieval evaluation with incomplete information. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 25\u201332).","DOI":"10.1145\/1008992.1009000"},{"key":"9282_CR6","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1007\/s10791-007-9032-x","volume":"10","author":"C Buckley","year":"2007","unstructured":"Buckley, C., Dimmick, D., Soboroff, I., & Voorhees, E. M. (2007). Bias and the limits of pooling for large collections. Information Retrieval Journal, 10, 491\u2013508.","journal-title":"Information Retrieval Journal"},{"key":"9282_CR7","doi-asserted-by":"crossref","unstructured":"B\u00fcttcher, S., Clarke, C. L. A., Yeung, P. C. K., & Soboroff, I. (2007). Reliable information retrieval evaluation with incomplete and biased judgements. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 63\u201370).","DOI":"10.1145\/1277741.1277755"},{"key":"9282_CR8","doi-asserted-by":"crossref","unstructured":"Carterette, B., Kanoulas, E., & Yilmaz, E. (2010). Low cost evaluation in information retrieval. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (p. 903).","DOI":"10.1145\/1835449.1835675"},{"key":"9282_CR9","doi-asserted-by":"crossref","unstructured":"Chapelle, O., Metlzer, D., Zhang, Y., & Grinspan, P. (2009). Expected reciprocal rank for graded relevance. In Proceedings of the Conference on Information and Knowledge Management (pp. 621\u2013630). ACM.","DOI":"10.1145\/1645953.1646033"},{"key":"9282_CR10","unstructured":"Clarke, C. L. A., Craswell, N., Soboroff, I., & Cormack, G. V. (2010). Overview of the TREC 2010 Web track. In Proceedings of TREC."},{"key":"9282_CR11","doi-asserted-by":"crossref","unstructured":"Demartini, G., & Mizzaro, S. (2006). A classification of IR effectiveness metrics. In Proceedings of the European Conference on IR Research (pp. 488\u2013491). Berlin, Heidelberg: Springer.","DOI":"10.1007\/11735106_48"},{"issue":"4","key":"9282_CR12","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1145\/582415.582418","volume":"20","author":"K J\u00e4rvelin","year":"2002","unstructured":"J\u00e4rvelin, K., & Kek\u00e4l\u00e4inen, J. (2002). Cumulated gain-based evaluation of IR techniques. ACM Transactions on Information Systems, 20(4), 422\u2013446.","journal-title":"ACM Transactions on Information Systems"},{"key":"9282_CR13","doi-asserted-by":"crossref","unstructured":"Kanoulas, E., & Aslam, J. A. (2009). Empirical justification of the gain and discount function for NDCG. In Proceedings of Conference on Information and Knowledge Management (pp. 611\u2013620). ACM.","DOI":"10.1145\/1645953.1646032"},{"key":"9282_CR14","doi-asserted-by":"crossref","unstructured":"Moffat, A. (2013). Seven numeric properties of effectiveness metrics. In Proceedings of Asian Information Retrieval Societies Conference (pp. 1\u201312).","DOI":"10.1007\/978-3-642-45068-6_1"},{"key":"9282_CR15","doi-asserted-by":"crossref","unstructured":"Moffat, A., Bailey, P., Scholer, F., & Thomas, P. (2015). INST: An adaptive metric for information retrieval evaluation. In Proceedings of the Australasian Document Computing Symposium (pp. 5:1\u20135:4).","DOI":"10.1145\/2838931.2838938"},{"key":"9282_CR16","doi-asserted-by":"crossref","unstructured":"Moffat, A., Thomas, P., & Scholer, F. (2013). Users versus models: What observation tells us about effectiveness metrics. In Proceedings of Conference on Information and Knowledge Management (pp. 659\u2013668).","DOI":"10.1145\/2505515.2507665"},{"key":"9282_CR17","doi-asserted-by":"crossref","unstructured":"Moffat, A., Webber, W., & Zobel, J. (2007). Strategic system comparisons via targeted relevance judgments. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 375\u2013382).","DOI":"10.1145\/1277741.1277806"},{"issue":"1","key":"9282_CR18","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1145\/1416950.1416952","volume":"27","author":"A Moffat","year":"2008","unstructured":"Moffat, A., & Zobel, J. (2008). Rank-biased precision for measurement of retrieval effectiveness. ACM Transactions on Information Systems, 27(1), 2.","journal-title":"ACM Transactions on Information Systems"},{"key":"9282_CR19","doi-asserted-by":"crossref","unstructured":"Ravana, S.\u00a0D., & Moffat, A. (2010). Score estimation, incomplete judgments, and significance testing in IR evaluation. In Proceedings of the Asian Information Retrieval Societies Conference (pp. 97\u2013109).","DOI":"10.1007\/978-3-642-17187-1_9"},{"key":"9282_CR20","doi-asserted-by":"crossref","unstructured":"Roberston, S. E., Kanoulas, E., & Yilmaz, E. (2010). Extending average precision to graded relevance judgments. In Proceedings of the ACM-SIGIR Interenational Conference on Research and Development in Information Retrieval (pp. 603\u2013610).","DOI":"10.1145\/1835449.1835550"},{"key":"9282_CR21","unstructured":"Sakai, T. (2004). New performance metrics based on multigrade relevance: Their application to question answering. In Proceedings of the NII Testbeds and Communities for Information Access and Research."},{"key":"9282_CR22","doi-asserted-by":"crossref","unstructured":"Sakai, T. (2006). Evaluating evaluation metrics based on the bootstrap. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 525\u2013532). New York, NY: ACM Press.","DOI":"10.1145\/1148170.1148261"},{"key":"9282_CR23","doi-asserted-by":"crossref","unstructured":"Sakai, T. (2007). Alternatives to BPref. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 71\u201378).","DOI":"10.1145\/1277741.1277756"},{"key":"9282_CR24","doi-asserted-by":"crossref","unstructured":"Sakai, T. (2014). Metrics, statistics, tests. In Bridging Between Information Retrieval and Databases: PROMISE Winter School 2013, Bressanone, Italy, February 4\u20138, 2013. Revised Tutorial Lectures (pp. 116\u2013163). Berlin, Heidelberg: Springer.","DOI":"10.1007\/978-3-642-54798-0_6"},{"issue":"5","key":"9282_CR25","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1007\/s10791-008-9059-7","volume":"11","author":"T Sakai","year":"2008","unstructured":"Sakai, T., & Kando, N. (2008). On information retrieval metrics designed for evaluation with incomplete relevance assessments. Information Retrieval Journal, 11(5), 447\u2013470.","journal-title":"Information Retrieval Journal"},{"issue":"4","key":"9282_CR26","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1561\/1500000009","volume":"4","author":"M Sanderson","year":"2010","unstructured":"Sanderson, M. (2010). Test collection based evaluation of information retrieval systems. Foundations and Trends in Information Retrieval, 4(4), 247\u2013375.","journal-title":"Foundations and Trends in Information Retrieval"},{"key":"9282_CR27","doi-asserted-by":"crossref","unstructured":"Voorhees, E.\u00a0M. (2001). Evaluation by highly relevant documents. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 74\u201382). ACM.","DOI":"10.1145\/383952.383963"},{"key":"9282_CR28","doi-asserted-by":"crossref","unstructured":"Voorhees, E.\u00a0M. (2002). The philosophy of information retrieval evaluation. In Evaluation of Cross-Language Information Retrieval Systems: Second Workshop of the Cross-Language Evaluation Forum (pp. 355\u2013370). Berlin, Heidelberg: Springer.","DOI":"10.1007\/3-540-45691-0_34"},{"key":"9282_CR29","volume-title":"TREC: Experiment and evaluation in information retrieval","author":"EM Voorhees","year":"2005","unstructured":"Voorhees, E. M., & Harman, D. K. (2005). TREC: Experiment and evaluation in information retrieval. Cambridge: The MIT Press."},{"key":"9282_CR30","unstructured":"Webber, W., Moffat, A., & Zobel, J. (2010). The effect of pooling and evaluation depth on metric stability. In Proceedings of the Workshop Evaluation Information Access (pp. 7\u201315)."},{"issue":"4","key":"9282_CR31","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/1852102.1852106","volume":"28","author":"W Webber","year":"2010","unstructured":"Webber, W., Moffat, A., & Zobel, J. (2010). A similarity measure for indefinite rankings. ACM Transactions on Information Systems, 28(4), 20.","journal-title":"ACM Transactions on Information Systems"},{"key":"9282_CR32","doi-asserted-by":"crossref","unstructured":"Yilmaz, E., & Aslam, J. A. (2006). Estimating average precision with incomplete and imperfect judgments. In Proceedings of the Conference on Information and Knowledge Management (pp. 102\u2013111).","DOI":"10.1145\/1183614.1183633"},{"key":"9282_CR33","doi-asserted-by":"crossref","unstructured":"Yilmaz, E., Aslam, J. A., & Robertson, S. (2008). A new rank correlation coefficient for information retrieval. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 587\u2013594). ACM.","DOI":"10.1145\/1390334.1390435"},{"key":"9282_CR34","doi-asserted-by":"crossref","unstructured":"Yilmaz, E., Kanoulas, E., & Aslam, J. A. (2008). A simple and efficient sampling method for estimating AP and NDCG. In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 603\u2013610).","DOI":"10.1145\/1390334.1390437"},{"issue":"3","key":"9282_CR35","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1007\/s10791-009-9116-x","volume":"13","author":"E Yilmaz","year":"2010","unstructured":"Yilmaz, E., & Robertson, S. (2010). On the choice of effectiveness measures for learning to rank. Information Retrieval Journal, 13(3), 271\u2013290.","journal-title":"Information Retrieval Journal"},{"key":"9282_CR36","doi-asserted-by":"crossref","unstructured":"Zobel, J. (1998). How reliable are the results of large-scale information retrieval experiments? In Proceedings of the ACM-SIGIR International Conference on Research and Development in Information Retrieval (pp. 307\u2013314).","DOI":"10.1145\/290941.291014"}],"container-title":["Information Retrieval Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-016-9282-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10791-016-9282-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-016-9282-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-016-9282-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,2]],"date-time":"2024-01-02T14:24:13Z","timestamp":1704205453000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10791-016-9282-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,6,21]]},"references-count":36,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2016,8]]}},"alternative-id":["9282"],"URL":"https:\/\/doi.org\/10.1007\/s10791-016-9282-6","relation":{},"ISSN":["1386-4564","1573-7659"],"issn-type":[{"value":"1386-4564","type":"print"},{"value":"1573-7659","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,6,21]]},"assertion":[{"value":"15 February 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 June 2016","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 June 2016","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}