{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T19:26:18Z","timestamp":1752002778905,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679591","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:21Z","timestamp":1729452861000},"page":"1143-1152","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["FastSimiFeat: A Fast and Generalized Approach Utilizing\n            <i>k<\/i>\n            -NN for Noisy Data Handling"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0496-5518","authenticated-orcid":false,"given":"Jungi","family":"Lee","sequence":"first","affiliation":[{"name":"ELROILAB Inc., Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7132-9653","authenticated-orcid":false,"given":"Hwiwoo","family":"Park","sequence":"additional","affiliation":[{"name":"ELROILAB Inc., Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7135-0883","authenticated-orcid":false,"given":"Myounghwan","family":"Kim","sequence":"additional","affiliation":[{"name":"ELROILAB Inc., Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2886-5893","authenticated-orcid":false,"given":"Jiseong","family":"Yoon","sequence":"additional","affiliation":[{"name":"ELROILAB Inc., Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0366-567X","authenticated-orcid":false,"given":"Kwangsun","family":"Yoo","sequence":"additional","affiliation":[{"name":"ELROILAB Inc., Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6110-2543","authenticated-orcid":false,"given":"Seok-Joo","family":"Byun","sequence":"additional","affiliation":[{"name":"ELROILAB Inc., Seoul, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/293347.293348"},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Machine Learning. PMLR, 540--550","author":"Bahri Dara","year":"2020","unstructured":"Dara Bahri, Heinrich Jiang, and Maya Gupta. 2020. Deep k-nn for noisy labels. In International Conference on Machine Learning. PMLR, 540--550."},{"key":"e_1_3_2_1_3_1","first-page":"24392","article-title":"Understanding and improving early stopping for learning with noisy labels","volume":"34","author":"Bai Yingbin","year":"2021","unstructured":"Yingbin Bai, Erkun Yang, Bo Han, Yanhua Yang, Jiatong Li, Yinian Mao, Gang Niu, and Tongliang Liu. 2021. Understanding and improving early stopping for learning with noisy labels. Advances in Neural Information Processing Systems, Vol. 34 (2021), 24392--24403.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_4_1","volume-title":"International Conference on Machine Learning. PMLR, 1062--1070","author":"Chen Pengfei","year":"2019","unstructured":"Pengfei Chen, Ben Ben Liao, Guangyong Chen, and Shengyu Zhang. 2019. Understanding and utilizing deep neural networks trained with noisy labels. In International Conference on Machine Learning. PMLR, 1062--1070."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109013"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_7_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_8_1","first-page":"30284","article-title":"Generalized jensen-shannon divergence loss for learning with noisy labels","volume":"34","author":"Englesson Erik","year":"2021","unstructured":"Erik Englesson and Hossein Azizpour. 2021. Generalized jensen-shannon divergence loss for learning with noisy labels. Advances in Neural Information Processing Systems, Vol. 34 (2021), 30284--30297.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_9_1","volume-title":"Noisy-label Learning with Sample Selection based on Noise Rate Estimate. arXiv preprint arXiv:2305.19486","author":"Garg Arpit","year":"2023","unstructured":"Arpit Garg, Cuong Nguyen, Rafael Felix, Thanh-Toan Do, and Gustavo Carneiro. 2023. Noisy-label Learning with Sample Selection based on Noise Rate Estimate. arXiv preprint arXiv:2305.19486 (2023)."},{"key":"e_1_3_2_1_10_1","first-page":"518","article-title":"Similarity search in high dimensions via hashing","volume":"99","author":"Gionis Aristides","year":"1999","unstructured":"Aristides Gionis, Piotr Indyk, Rajeev Motwani, et al. 1999. Similarity search in high dimensions via hashing. In Vldb, Vol. 99. 518--529.","journal-title":"Vldb"},{"key":"e_1_3_2_1_11_1","volume-title":"A survey of label-noise representation learning: Past, present and future. arXiv preprint arXiv:2011.04406","author":"Han Bo","year":"2020","unstructured":"Bo Han, Quanming Yao, Tongliang Liu, Gang Niu, Ivor W Tsang, James T Kwok, and Masashi Sugiyama. 2020. A survey of label-noise representation learning: Past, present and future. arXiv preprint arXiv:2011.04406 (2020)."},{"key":"e_1_3_2_1_12_1","volume-title":"Co-teaching: Robust training of deep neural networks with extremely noisy labels. Advances in neural information processing systems","author":"Han Bo","year":"2018","unstructured":"Bo Han, Quanming Yao, Xingrui Yu, Gang Niu, Miao Xu, Weihua Hu, Ivor Tsang, and Masashi Sugiyama. 2018. Co-teaching: Robust training of deep neural networks with extremely noisy labels. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00463"},{"key":"e_1_3_2_1_15_1","volume-title":"International conference on machine learning. PMLR, 4804--4815","author":"Jiang Lu","year":"2020","unstructured":"Lu Jiang, Di Huang, Mason Liu, and Weilong Yang. 2020. Beyond synthetic noise: Deep learning on controlled noisy labels. In International conference on machine learning. PMLR, 4804--4815."},{"key":"e_1_3_2_1_16_1","volume-title":"International conference on machine learning. PMLR, 2304--2313","author":"Jiang Lu","year":"2018","unstructured":"Lu Jiang, Zhengyuan Zhou, Thomas Leung, Li-Jia Li, and Li Fei-Fei. 2018. Mentornet: Learning data-driven curriculum for very deep neural networks on corrupted labels. In International conference on machine learning. PMLR, 2304--2313."},{"key":"e_1_3_2_1_17_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_18_1","volume-title":"KNN-enhanced Deep Learning Against Noisy Labels. arXiv preprint arXiv:2012.04224","author":"Kong Shuyu","year":"2020","unstructured":"Shuyu Kong, You Li, Jia Wang, Amin Rezaei, and Hai Zhou. 2020. KNN-enhanced Deep Learning Against Noisy Labels. arXiv preprint arXiv:2012.04224 (2020)."},{"key":"e_1_3_2_1_19_1","unstructured":"Alex Krizhevsky Geoffrey Hinton et al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_1_20_1","volume-title":"Temporal ensembling for semi-supervised learning. arXiv preprint arXiv:1610.02242","author":"Laine Samuli","year":"2016","unstructured":"Samuli Laine and Timo Aila. 2016. Temporal ensembling for semi-supervised learning. arXiv preprint arXiv:1610.02242 (2016)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.3233\/IDA-184024"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20053-3_8"},{"key":"e_1_3_2_1_23_1","volume-title":"Dividemix: Learning with noisy labels as semi-supervised learning. arXiv preprint arXiv:2002.07394","author":"Li Junnan","year":"2020","unstructured":"Junnan Li, Richard Socher, and Steven CH Hoi. 2020. Dividemix: Learning with noisy labels as semi-supervised learning. arXiv preprint arXiv:2002.07394 (2020)."},{"key":"e_1_3_2_1_24_1","volume-title":"Webvision database: Visual learning and understanding from web data. arXiv preprint arXiv:1708.02862","author":"Li Wen","year":"2017","unstructured":"Wen Li, Limin Wang, Wei Li, Eirikur Agustsson, and Luc Van Gool. 2017. Webvision database: Visual learning and understanding from web data. arXiv preprint arXiv:1708.02862 (2017)."},{"key":"e_1_3_2_1_25_1","volume-title":"Early-learning regularization prevents memorization of noisy labels. Advances in neural information processing systems","author":"Liu Sheng","year":"2020","unstructured":"Sheng Liu, Jonathan Niles-Weed, Narges Razavian, and Carlos Fernandez-Granda. 2020. Early-learning regularization prevents memorization of noisy labels. Advances in neural information processing systems, Vol. 33 (2020), 20331--20342."},{"key":"e_1_3_2_1_26_1","volume-title":"Virtual adversarial training: a regularization method for supervised and semi-supervised learning","author":"Miyato Takeru","year":"2018","unstructured":"Takeru Miyato, Shin-ichi Maeda, Masanori Koyama, and Shin Ishii. 2018. Virtual adversarial training: a regularization method for supervised and semi-supervised learning. IEEE transactions on pattern analysis and machine intelligence, Vol. 41, 8 (2018), 1979--1993."},{"key":"e_1_3_2_1_27_1","volume-title":"GPT-4 technical report. arXiv","author":"R","year":"2023","unstructured":"R OpenAI. 2023. GPT-4 technical report. arXiv (2023), 2303--08774."},{"key":"e_1_3_2_1_28_1","first-page":"17044","article-title":"Identifying mislabeled data using the area under the margin ranking","volume":"33","author":"Pleiss Geoff","year":"2020","unstructured":"Geoff Pleiss, Tianyi Zhang, Ethan Elenberg, and Kilian Q Weinberger. 2020. Identifying mislabeled data using the area under the margin ranking. Advances in Neural Information Processing Systems, Vol. 33 (2020), 17044--17056.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_30_1","volume-title":"Metrics for evaluating 3D medical image segmentation: analysis, selection, and tool. BMC medical imaging","author":"Taha Abdel Aziz","year":"2015","unstructured":"Abdel Aziz Taha and Allan Hanbury. 2015. Metrics for evaluating 3D medical image segmentation: analysis, selection, and tool. BMC medical imaging, Vol. 15, 1 (2015), 1--28."},{"volume-title":"The 2011 international joint conference on neural networks","author":"Wang Xueyi","key":"e_1_3_2_1_31_1","unstructured":"Xueyi Wang. 2011. A fast exact k-nearest neighbors algorithm for high dimensional search using k-means clustering and triangle inequality. In The 2011 international joint conference on neural networks. IEEE, 1293--1299."},{"key":"e_1_3_2_1_32_1","volume-title":"Sample selection with uncertainty of losses for learning with noisy labels. arXiv preprint arXiv:2106.00445","author":"Xia Xiaobo","year":"2021","unstructured":"Xiaobo Xia, Tongliang Liu, Bo Han, Mingming Gong, Jun Yu, Gang Niu, and Masashi Sugiyama. 2021. Sample selection with uncertainty of losses for learning with noisy labels. arXiv preprint arXiv:2106.00445 (2021)."},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 2691--2699","author":"Xiao Tong","year":"2015","unstructured":"Tong Xiao, Tian Xia, Yi Yang, Chang Huang, and Xiaogang Wang. 2015. Learning from massive noisy labeled data for image classification. In Proceedings of the IEEE conference on computer vision and pattern recognition. 2691--2699."},{"key":"e_1_3_2_1_34_1","volume-title":"An information-theoretic noise-robust loss function. arXiv preprint arXiv:1909.03388","author":"Xu Yilun","year":"2019","unstructured":"Yilun Xu, Peng Cao, Yuqing Kong, and Yizhou Wang. 2019. L_dmi: An information-theoretic noise-robust loss function. arXiv preprint arXiv:1909.03388 (2019)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3446776"},{"key":"e_1_3_2_1_36_1","volume-title":"mixup: Beyond empirical risk minimization. arXiv preprint arXiv:1710.09412","author":"Zhang Hongyi","year":"2017","unstructured":"Hongyi Zhang, Moustapha Cisse, Yann N Dauphin, and David Lopez-Paz. 2017. mixup: Beyond empirical risk minimization. arXiv preprint arXiv:1710.09412 (2017)."},{"key":"e_1_3_2_1_37_1","volume-title":"Improving crowdsourced label quality using noise correction","author":"Zhang Jing","year":"2017","unstructured":"Jing Zhang, Victor S Sheng, Tao Li, and Xindong Wu. 2017. Improving crowdsourced label quality using noise correction. IEEE transactions on neural networks and learning systems, Vol. 29, 5 (2017), 1675--1688."},{"key":"e_1_3_2_1_38_1","volume-title":"Learning with Feature-Dependent Label Noise: A Progressive Approach. CoRR","author":"Zhang Yikai","year":"2021","unstructured":"Yikai Zhang, Songzhu Zheng, Pengxiang Wu, Mayank Goswami, and Chao Chen. 2021. Learning with Feature-Dependent Label Noise: A Progressive Approach. CoRR, Vol. abs\/2103.07756 (2021). showeprint[arXiv]2103.07756 https:\/\/arxiv.org\/abs\/2103.07756"},{"volume-title":"Advances in Neural Information Processing Systems","author":"Zhang Zhilu","key":"e_1_3_2_1_39_1","unstructured":"Zhilu Zhang and Mert Sabuncu. 2018. Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels. In Advances in Neural Information Processing Systems, S. Bengio, H. Wallach, H. Larochelle, K. Grauman, N. Cesa-Bianchi, and R. Garnett (Eds.), Vol. 31. Curran Associates, Inc."},{"key":"e_1_3_2_1_40_1","volume-title":"International Conference on Machine Learning. PMLR, 27412--27427","author":"Zhu Zhaowei","year":"2022","unstructured":"Zhaowei Zhu, Zihao Dong, and Yang Liu. 2022. Detecting corrupted labels without training a model to predict. In International Conference on Machine Learning. PMLR, 27412--27427."},{"key":"e_1_3_2_1_41_1","volume-title":"International Conference on Machine Learning. PMLR, 12912--12923","author":"Zhu Zhaowei","year":"2021","unstructured":"Zhaowei Zhu, Yiwen Song, and Yang Liu. 2021. Clusterability as an alternative to anchor points when learning with noisy labels. In International Conference on Machine Learning. PMLR, 12912--12923."},{"key":"e_1_3_2_1_42_1","volume-title":"International Conference on Machine Learning. PMLR, 27633--27653","author":"Zhu Zhaowei","year":"2022","unstructured":"Zhaowei Zhu, Jialu Wang, and Yang Liu. 2022. Beyond images: Label noise transition matrix estimation for tasks with lower-quality features. In International Conference on Machine Learning. PMLR, 27633--27653."}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Boise ID USA","acronym":"CIKM '24"},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679591","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679591","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:23Z","timestamp":1750294703000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679591"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":42,"alternative-id":["10.1145\/3627673.3679591","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679591","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}