{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T08:36:34Z","timestamp":1771576594136,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,8,14]],"date-time":"2021-08-14T00:00:00Z","timestamp":1628899200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DP150103710"],"award-info":[{"award-number":["DP150103710"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,8,14]]},"DOI":"10.1145\/3447548.3467435","type":"proceedings-article","created":{"date-parts":[[2021,8,13]],"date-time":"2021-08-13T18:21:39Z","timestamp":1628878899000},"page":"1180-1190","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Needle in a Haystack"],"prefix":"10.1145","author":[{"given":"Neil G.","family":"Marchant","sequence":"first","affiliation":[{"name":"University of Melbourne, Melbourne, Australia"}]},{"given":"Benjamin I. P.","family":"Rubinstein","sequence":"additional","affiliation":[{"name":"University of Melbourne, Melbourne, Australia"}]}],"member":"320","published-online":{"date-parts":[[2021,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Carvalho","author":"Bennett Paul N.","year":"2010","unstructured":"Paul N. Bennett and Vitor R . Carvalho . 2010 . Online Stratified Sampling: Evaluating Classifiers at Web-scale. In CIKM. 1581--1584. https:\/\/doi.org\/10.1145\/ 1871437.1871677 Paul N. Bennett and Vitor R. Carvalho. 2010. Online Stratified Sampling: Evaluating Classifiers at Web-scale. In CIKM. 1581--1584. https:\/\/doi.org\/10.1145\/ 1871437.1871677"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/361002.361007"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2699226"},{"key":"e_1_3_2_2_4_1","volume-title":"Robert","author":"Capp\u00e9 Olivier","year":"2008","unstructured":"Olivier Capp\u00e9 , Randal Douc , Arnaud Guillin , Jean-Michel Marin , and Christian P . Robert . 2008 . Adaptive importance sampling in general mixture classes. Statistics and Computing 18, 4 (01 Dec. 2008), 447--459. https:\/\/doi.org\/10.1007\/s11222-008-9059-x 10.1007\/s11222-008-9059-x Olivier Capp\u00e9, Randal Douc, Arnaud Guillin, Jean-Michel Marin, and Christian P. Robert. 2008. Adaptive importance sampling in general mixture classes. Statistics and Computing 18, 4 (01 Dec. 2008), 447--459. https:\/\/doi.org\/10.1007\/s11222-008-9059-x"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"e_1_3_2_2_6_1","volume-title":"Sampling Techniques","author":"Cochran William G.","unstructured":"William G. Cochran . 1977. Sampling Techniques ( 3 rd ed.). Wiley , New York . William G. Cochran. 1977. Sampling Techniques (3rd ed.). Wiley, New York.","edition":"3"},{"key":"e_1_3_2_2_7_1","volume-title":"Proceedings of the 21st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Cormack Gordon V.","unstructured":"Gordon V. Cormack , Christopher R. Palmer , and Charles L. A. Clarke . 1998. Efficient Construction of Large Test Collections . In Proceedings of the 21st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval ( Melbourne, Australia) (SIGIR '98). Association for Computing Machinery, New York, NY, USA, 282--289. https:\/\/doi.org\/10.1145\/290941.291009 10.1145\/290941.291009 Gordon V. Cormack, Christopher R. Palmer, and Charles L. A. Clarke. 1998. Efficient Construction of Large Test Collections. In Proceedings of the 21st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (Melbourne, Australia) (SIGIR '98). Association for Computing Machinery, New York, NY, USA, 282--289. https:\/\/doi.org\/10.1145\/290941.291009"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-9469.2011.00756.x"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1959.10501501"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/0378-3758(95)00112-3"},{"key":"e_1_3_2_2_11_1","volume-title":"CIKM (New York","author":"Druck Gregory","year":"2063","unstructured":"Gregory Druck and Andrew McCallum . 2011. Toward Interactive Training and Evaluation . In CIKM (New York , NY , USA) . 947--956. https:\/\/doi.org\/10.1145\/ 2063 576.2063712 Gregory Druck and Andrew McCallum. 2011. Toward Interactive Training and Evaluation. In CIKM (New York, NY, USA). 947--956. https:\/\/doi.org\/10.1145\/ 2063576.2063712"},{"key":"e_1_3_2_2_12_1","volume-title":"An Introduction to Probability Theory and Its Applications","author":"Feller W.","unstructured":"W. Feller . 1968. An Introduction to Probability Theory and Its Applications , Volume 1 ( 3 rd ed.). Wiley . W. Feller. 1968. An Introduction to Probability Theory and Its Applications, Volume 1 (3rd ed.). Wiley.","edition":"3"},{"key":"e_1_3_2_2_13_1","volume-title":"An Introduction to Probability Theory and Its Applications","author":"Feller W.","unstructured":"W. Feller . 1971. An Introduction to Probability Theory and Its Applications , Volume 2 ( 2 nd ed.). Wiley . W. Feller. 1971. An Introduction to Probability Theory and Its Applications, Volume 2 (2nd ed.). Wiley.","edition":"2"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.14778\/3342263.3342642"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1134\/S0361768818060142"},{"key":"e_1_3_2_2_16_1","first-page":"1","article-title":"Ignorability in Statistical and Probabilistic Inference","volume":"24","author":"Jaeger Manfred","year":"2005","unstructured":"Manfred Jaeger . 2005 . Ignorability in Statistical and Probabilistic Inference . J. Artif. Int. Res. 24 , 1 (Dec. 2005), 889--917. Manfred Jaeger. 2005. Ignorability in Statistical and Probabilistic Inference. J. Artif. Int. Res. 24, 1 (Dec. 2005), 889--917.","journal-title":"J. Artif. Int. Res."},{"key":"e_1_3_2_2_17_1","volume-title":"A Short Survey on Online and Offline Methods for Search Quality Evaluation","author":"Kanoulas Evangelos","unstructured":"Evangelos Kanoulas . 2016. A Short Survey on Online and Offline Methods for Search Quality Evaluation . Springer International Publishing , Cham , 38--87. https:\/\/doi.org\/10.1007\/978-3-319-41718-9_3 10.1007\/978-3-319-41718-9_3 Evangelos Kanoulas. 2016. A Short Survey on Online and Offline Methods for Search Quality Evaluation. Springer International Publishing, Cham, 38--87. https:\/\/doi.org\/10.1007\/978-3-319-41718-9_3"},{"key":"e_1_3_2_2_18_1","volume-title":"Predicting disease risks from highly imbalanced data using random forest. BMC Medical Informatics and Decision Making 11, 1","author":"Khalilia Mohammed","year":"2011","unstructured":"Mohammed Khalilia , Sounak Chakraborty , and Mihail Popescu . 2011. Predicting disease risks from highly imbalanced data using random forest. BMC Medical Informatics and Decision Making 11, 1 ( 2011 ), 13 pages. https:\/\/doi.org\/10.1186\/1472-6947-11-51 10.1186\/1472-6947-11-51 Mohammed Khalilia, Sounak Chakraborty, and Mihail Popescu. 2011. Predicting disease risks from highly imbalanced data using random forest. BMC Medical Informatics and Decision Making 11, 1 (2011), 13 pages. https:\/\/doi.org\/10.1186\/1472-6947-11-51"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920904"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133015"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.14778\/3137628.3137642"},{"key":"e_1_3_2_2_22_1","volume-title":"Rubinstein","author":"Marchant Neil G.","year":"2021","unstructured":"Neil G. Marchant and Benjamin I. P . Rubinstein . 2021 . Needle in a Haystack : Label-Efficient Evaluation under Extreme Class Imbalance . arXiv:2006.06963 [cs.LG] Neil G. Marchant and Benjamin I. P. Rubinstein. 2021. Needle in a Haystack: Label-Efficient Evaluation under Extreme Class Imbalance. arXiv:2006.06963 [cs.LG]"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.3150\/18-BEJ1042"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.14778\/2735471.2735474"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1080\/00949659208810398"},{"key":"e_1_3_2_2_27_1","volume-title":"Advances in Neural Information Processing Systems 31","author":"Portier Fran\u00e7ois","unstructured":"Fran\u00e7ois Portier and Bernard Delyon . 2018. Asymptotic optimality of adaptive importance sampling . In Advances in Neural Information Processing Systems 31 , S. Bengio, H. Wallach, H. Larochelle, K. Grauman, N. Cesa-Bianchi, and R. Garnett (Eds.). Curran Associates, Inc. , 3138--3148. Fran\u00e7ois Portier and Bernard Delyon. 2018. Asymptotic optimality of adaptive importance sampling. In Advances in Neural Information Processing Systems 31, S. Bengio, H. Wallach, H. Larochelle, K. Grauman, N. Cesa-Bianchi, and R. Garnett (Eds.). Curran Associates, Inc., 3138--3148."},{"key":"e_1_3_2_2_28_1","volume-title":"Porto Seguro's Safe Driver Prediction. https:\/\/www.kaggle. com\/c\/porto-seguro-safe-driver-prediction","author":"Seguro Porto","year":"2019","unstructured":"Porto Seguro . 2017. Porto Seguro's Safe Driver Prediction. https:\/\/www.kaggle. com\/c\/porto-seguro-safe-driver-prediction . Accessed : Dec 2019 . Porto Seguro. 2017. Porto Seguro's Safe Driver Prediction. https:\/\/www.kaggle. com\/c\/porto-seguro-safe-driver-prediction. Accessed: Dec 2019."},{"key":"e_1_3_2_2_29_1","volume-title":"Calibrating Probability with Undersampling for Unbalanced Classification. In 2015 IEEE Symposium Series on Computational Intelligence. 159--166","author":"Pozzolo A. D.","year":"2015","unstructured":"A. D. Pozzolo , O. Caelen , R. A. Johnson , and G. Bontempi . 2015 . Calibrating Probability with Undersampling for Unbalanced Classification. In 2015 IEEE Symposium Series on Computational Intelligence. 159--166 . https:\/\/doi.org\/10. 1109\/SSCI. 2015 .33 A. D. Pozzolo, O. Caelen, R. A. Johnson, and G. Bontempi. 2015. Calibrating Probability with Undersampling for Unbalanced Classification. In 2015 IEEE Symposium Series on Computational Intelligence. 159--166. https:\/\/doi.org\/10. 1109\/SSCI.2015.33"},{"key":"e_1_3_2_2_30_1","volume-title":"Reddy and Bhanukiran Vinzamuri","author":"Chandan","year":"2014","unstructured":"Chandan K. Reddy and Bhanukiran Vinzamuri . 2014 . A Survey of Partitional and Hierarchical Clustering Algorithms (1st ed.). Chapman & Hall\/CRC , 87--110. https:\/\/doi.org\/10.1201\/9781315373515-4 10.1201\/9781315373515-4 Chandan K. Reddy and Bhanukiran Vinzamuri. 2014. A Survey of Partitional and Hierarchical Clustering Algorithms (1st ed.). Chapman & Hall\/CRC, 87--110. https:\/\/doi.org\/10.1201\/9781315373515-4"},{"key":"e_1_3_2_2_31_1","unstructured":"RIDDLE 2003. Duplicate Detection Record Linkage and Identity Uncertainty: Datasets. http:\/\/www.cs.utexas.edu\/users\/ml\/riddle\/data.html. Accessed: Dec 2016.  RIDDLE 2003. Duplicate Detection Record Linkage and Identity Uncertainty: Datasets. http:\/\/www.cs.utexas.edu\/users\/ml\/riddle\/data.html. Accessed: Dec 2016."},{"key":"e_1_3_2_2_32_1","volume-title":"Kroese","author":"Rubinstein Reuven Y.","year":"2016","unstructured":"Reuven Y. Rubinstein and Dirk P . Kroese . 2016 . Simulation and the Monte Carlo Method. John Wiley & Sons, Ltd . https:\/\/doi.org\/10.1002\/9781118631980 10.1002\/9781118631980 Reuven Y. Rubinstein and Dirk P. Kroese. 2016. Simulation and the Monte Carlo Method. John Wiley & Sons, Ltd. https:\/\/doi.org\/10.1002\/9781118631980"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/3104322.3104443"},{"key":"e_1_3_2_2_34_1","volume-title":"Advances in Neural Information Processing Systems 23","author":"Sawade Christoph","year":"2083","unstructured":"Christoph Sawade , Niels Landwehr , and Tobias Scheffer . 2010. Active Estimation of F-Measures . In Advances in Neural Information Processing Systems 23 , J. D. Lafferty, C. K. I. Williams, J. Shawe-Taylor, R. S. Zemel, and A. Culotta (Eds.). Curran Associates, Inc. , 2083 --2091. Christoph Sawade, Niels Landwehr, and Tobias Scheffer. 2010. Active Estimation of F-Measures. In Advances in Neural Information Processing Systems 23, J. D. Lafferty, C. K. I. Williams, J. Shawe-Taylor, R. S. Zemel, and A. Culotta (Eds.). Curran Associates, Inc., 2083--2091."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2970398.2970410"},{"key":"e_1_3_2_2_36_1","unstructured":"Erik Schultheis Mohammadreza Qaraei Priyanshu Gupta and Rohit Babbar. 2020. Unbiased Loss Functions for Extreme Classification With Missing Labels. arXiv:2007.00237 [stat.ML]  Erik Schultheis Mohammadreza Qaraei Priyanshu Gupta and Rohit Babbar. 2020. Unbiased Loss Functions for Extreme Classification With Missing Labels. arXiv:2007.00237 [stat.ML]"},{"key":"e_1_3_2_2_38_1","volume-title":"Delta Method","author":"van der Vaart A. W.","year":"1802","unstructured":"A. W. van der Vaart . 1998. Delta Method . Cambridge University Press , 25--34. https:\/\/doi.org\/10.1017\/CBO978051 1802 256.004 10.1017\/CBO9780511802256.004 A. W. van der Vaart. 1998. Delta Method. Cambridge University Press, 25--34. https:\/\/doi.org\/10.1017\/CBO9780511802256.004"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11280-012-0178-0"},{"key":"e_1_3_2_2_40_1","volume-title":"Bench-marking: Semisupervised Classifier Evaluation and Recalibration. In CVPR. 3262--3269. https:\/\/doi.org\/10.1109\/CVPR.2013.41","author":"Welinder P.","year":"2013","unstructured":"P. Welinder , M. Welling , and P. Perona . 2013 . A Lazy Man's Approach to Bench-marking: Semisupervised Classifier Evaluation and Recalibration. In CVPR. 3262--3269. https:\/\/doi.org\/10.1109\/CVPR.2013.41 10.1109\/CVPR.2013.41 P. Welinder, M. Welling, and P. Perona. 2013. A Lazy Man's Approach to Bench-marking: Semisupervised Classifier Evaluation and Recalibration. In CVPR. 3262--3269. https:\/\/doi.org\/10.1109\/CVPR.2013.41"}],"event":{"name":"KDD '21: The 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Virtual Event Singapore","acronym":"KDD '21","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3447548.3467435","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3447548.3467435","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:37Z","timestamp":1750191517000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3447548.3467435"}},"subtitle":["Label-Efficient Evaluation under Extreme Class Imbalance"],"short-title":[],"issued":{"date-parts":[[2021,8,14]]},"references-count":38,"alternative-id":["10.1145\/3447548.3467435","10.1145\/3447548"],"URL":"https:\/\/doi.org\/10.1145\/3447548.3467435","relation":{},"subject":[],"published":{"date-parts":[[2021,8,14]]},"assertion":[{"value":"2021-08-14","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}