{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:18:02Z","timestamp":1750220282636,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,2,11]],"date-time":"2022-02-11T00:00:00Z","timestamp":1644537600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,2,11]]},"DOI":"10.1145\/3488560.3498482","type":"proceedings-article","created":{"date-parts":[[2022,2,15]],"date-time":"2022-02-15T21:42:57Z","timestamp":1644961377000},"page":"526-534","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Efficient Two-stage Label Noise Reduction for Retrieval-based Tasks"],"prefix":"10.1145","author":[{"given":"Mengmeng","family":"Kuang","sequence":"first","affiliation":[{"name":"Tencent Holdings Ltd., Guangzhou, China"}]},{"given":"Weiyan","family":"Wang","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, China"}]},{"given":"Zhenhong","family":"Chen","sequence":"additional","affiliation":[{"name":"Tencent Holdings Ltd., Guangzhou, China"}]},{"given":"Lie","family":"Kang","sequence":"additional","affiliation":[{"name":"Tencent Holdings Ltd., Guangzhou, China"}]},{"given":"Qiang","family":"Yan","sequence":"additional","affiliation":[{"name":"Tencent Holdings Ltd., Guangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2022,2,15]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022873112823"},{"key":"e_1_3_2_2_2_1","volume-title":"Mixmatch: A holistic approach to semi-supervised learning. arXiv preprint arXiv:1905.02249","author":"Berthelot David","year":"2019","unstructured":"David Berthelot, Nicholas Carlini, Ian Goodfellow, Nicolas Papernot, Avital Oliver, and Colin Raffel. 2019. Mixmatch: A holistic approach to semi-supervised learning. arXiv preprint arXiv:1905.02249 (2019)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/1756006.1859921"},{"key":"e_1_3_2_2_4_1","volume-title":"International Conference on Machine Learning. PMLR, 1062--1070","author":"Chen Pengfei","year":"2019","unstructured":"Pengfei Chen, Ben Ben Liao, Guangyong Chen, and Shengyu Zhang. 2019. Understanding and utilizing deep neural networks trained with noisy labels. In International Conference on Machine Learning. PMLR, 1062--1070."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.58"},{"key":"e_1_3_2_2_6_1","volume-title":"Classification in the presence of label noise: a survey","author":"Michel Verleysen Fr\u00e9nay","year":"2013","unstructured":"Beno^it Fr\u00e9nay and Michel Verleysen. 2013. Classification in the presence of label noise: a survey. IEEE transactions on neural networks and learning systems, Vol. 25, 5 (2013), 845--869."},{"key":"e_1_3_2_2_7_1","unstructured":"Jacob Goldberger and Ehud Ben-Reuven. 2016. Training deep neural-networks using a noise adaptation layer. (2016)."},{"key":"e_1_3_2_2_8_1","volume-title":"Anit Kumar Sahu, and Wan-Yi Lin","author":"Gupta Gaurav","year":"2019","unstructured":"Gaurav Gupta, Anit Kumar Sahu, and Wan-Yi Lin. 2019. Learning in Confusion: Batch Active Learning with Noisy Oracle. arXiv preprint arXiv:1909.12473 (2019)."},{"key":"e_1_3_2_2_9_1","volume-title":"Co-teaching: Robust training of deep neural networks with extremely noisy labels. arXiv preprint arXiv:1804.06872","author":"Han Bo","year":"2018","unstructured":"Bo Han, Quanming Yao, Xingrui Yu, Gang Niu, Miao Xu, Weihua Hu, Ivor Tsang, and Masashi Sugiyama. 2018. Co-teaching: Robust training of deep neural networks with extremely noisy labels. arXiv preprint arXiv:1804.06872 (2018)."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403305"},{"key":"e_1_3_2_2_11_1","volume-title":"International Conference on Machine Learning. PMLR, 4804--4815","author":"Jiang Lu","year":"2020","unstructured":"Lu Jiang, Di Huang, Mason Liu, and Weilong Yang. 2020. Beyond synthetic noise: Deep learning on controlled noisy labels. In International Conference on Machine Learning. PMLR, 4804--4815."},{"key":"e_1_3_2_2_12_1","volume-title":"International Conference on Machine Learning. PMLR, 2304--2313","author":"Jiang Lu","year":"2018","unstructured":"Lu Jiang, Zhengyuan Zhou, Thomas Leung, Li-Jia Li, and Li Fei-Fei. 2018. Mentornet: Learning data-driven curriculum for very deep neural networks on corrupted labels. In International Conference on Machine Learning. PMLR, 2304--2313."},{"key":"e_1_3_2_2_13_1","volume-title":"An effective label noise model for dnn text classification. arXiv preprint arXiv:1903.07507","author":"Jindal Ishan","year":"2019","unstructured":"Ishan Jindal, Daniel Pressel, Brian Lester, and Matthew Nokleby. 2019. An effective label noise model for dnn text classification. arXiv preprint arXiv:1903.07507 (2019)."},{"key":"e_1_3_2_2_14_1","volume-title":"Dividemix: Learning with noisy labels as semi-supervised learning. arXiv preprint arXiv:2002.07394","author":"Li Junnan","year":"2020","unstructured":"Junnan Li, Richard Socher, and Steven CH Hoi. 2020. Dividemix: Learning with noisy labels as semi-supervised learning. arXiv preprint arXiv:2002.07394 (2020)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--642--14267--3"},{"key":"e_1_3_2_2_16_1","volume-title":"International Conference on Machine Learning. PMLR, 3355--3364","author":"Ma Xingjun","year":"2018","unstructured":"Xingjun Ma, Yisen Wang, Michael E Houle, Shuo Zhou, Sarah Erfani, Shutao Xia, Sudanthi Wijewickrema, and James Bailey. 2018. Dimensionality-driven learning with noisy labels. In International Conference on Machine Learning. PMLR, 3355--3364."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.5555\/2002472.2002491"},{"key":"e_1_3_2_2_18_1","volume-title":"Thi Phuong Nhung Ngo, Thi Hoai Phuong Nguyen, Laura Beggel, and Thomas Brox.","author":"Nguyen Duc Tam","year":"2019","unstructured":"Duc Tam Nguyen, Chaithanya Kumar Mummadi, Thi Phuong Nhung Ngo, Thi Hoai Phuong Nguyen, Laura Beggel, and Thomas Brox. 2019. Self: Learning to filter noisy labels with self-ensembling. arXiv preprint arXiv:1910.01842 (2019)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12125"},{"key":"e_1_3_2_2_20_1","volume-title":"2021 a. Pervasive label errors in test sets destabilize machine learning benchmarks. arXiv preprint arXiv:2103.14749","author":"Northcutt Curtis G","year":"2021","unstructured":"Curtis G Northcutt, Anish Athalye, and Jonas Mueller. 2021 a. Pervasive label errors in test sets destabilize machine learning benchmarks. arXiv preprint arXiv:2103.14749 (2021)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.240"},{"key":"e_1_3_2_2_22_1","volume-title":"Know what you don't know: Unanswerable questions for SQuAD. arXiv preprint arXiv:1806.03822","author":"Rajpurkar Pranav","year":"2018","unstructured":"Pranav Rajpurkar, Robin Jia, and Percy Liang. 2018. Know what you don't know: Unanswerable questions for SQuAD. arXiv preprint arXiv:1806.03822 (2018)."},{"key":"e_1_3_2_2_23_1","unstructured":"Pranav Rajpurkar Jian Zhang Konstantin Lopyrev and Percy Liang. 2016. Squad: 100 000"},{"volume-title":"arXiv preprint arXiv:1606.05250","year":"2016","key":"e_1_3_2_2_24_1","unstructured":"questions for machine comprehension of text. arXiv preprint arXiv:1606.05250 (2016)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000009"},{"key":"e_1_3_2_2_26_1","volume-title":"Estimating the support of a high-dimensional distribution. Neural computation","author":"Sch\u00f6lkopf Bernhard","year":"2001","unstructured":"Bernhard Sch\u00f6lkopf, John C Platt, John Shawe-Taylor, Alex J Smola, and Robert C Williamson. 2001. Estimating the support of a high-dimensional distribution. Neural computation, Vol. 13, 7 (2001), 1443--1471."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D13-1170"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2954547"},{"key":"e_1_3_2_2_29_1","volume-title":"Adam Trischler, Yoshua Bengio, and Geoffrey J Gordon.","author":"Toneva Mariya","year":"2018","unstructured":"Mariya Toneva, Alessandro Sordoni, Remi Tachet des Combes, Adam Trischler, Yoshua Bengio, and Geoffrey J Gordon. 2018. An empirical study of example forgetting during deep neural network learning. arXiv preprint arXiv:1812.05159 (2018)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00041"},{"key":"e_1_3_2_2_31_1","volume-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. Association for Computational Linguistics, Online, 38--45","author":"Wolf Thomas","year":"2020","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, Joe Davison, Sam Shleifer, Patrick von Platen, Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu, Teven Le Scao, Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. 2020. Transformers: State-of-the-Art Natural Language Processing. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. Association for Computational Linguistics, Online, 38--45. https:\/\/www.aclweb.org\/anthology\/2020.emnlp-demos.6"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.177"},{"key":"e_1_3_2_2_33_1","volume-title":"mixup: Beyond empirical risk minimization. arXiv preprint arXiv:1710.09412","author":"Zhang Hongyi","year":"2017","unstructured":"Hongyi Zhang, Moustapha Cisse, Yann N Dauphin, and David Lopez-Paz. 2017. mixup: Beyond empirical risk minimization. arXiv preprint arXiv:1710.09412 (2017)."},{"key":"e_1_3_2_2_34_1","volume-title":"Character-level convolutional networks for text classification. Advances in neural information processing systems","author":"Zhang Xiang","year":"2015","unstructured":"Xiang Zhang, Junbo Zhao, and Yann LeCun. 2015. Character-level convolutional networks for text classification. Advances in neural information processing systems, Vol. 28 (2015), 649--657."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/3327546.3327555"}],"event":{"name":"WSDM '22: The Fifteenth ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Virtual Event AZ USA","acronym":"WSDM '22"},"container-title":["Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498482","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3488560.3498482","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:31:19Z","timestamp":1750188679000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498482"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,11]]},"references-count":35,"alternative-id":["10.1145\/3488560.3498482","10.1145\/3488560"],"URL":"https:\/\/doi.org\/10.1145\/3488560.3498482","relation":{},"subject":[],"published":{"date-parts":[[2022,2,11]]},"assertion":[{"value":"2022-02-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}