{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:14:25Z","timestamp":1765008865193,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.1145\/3743093.3771031","type":"proceedings-article","created":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:08:11Z","timestamp":1765008491000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["SELECT: Detecting Label Errors in Real-world Scene Text Data"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8364-3391","authenticated-orcid":false,"given":"Wenjun","family":"Liu","sequence":"first","affiliation":[{"name":"Yidun AI Lab, NetEase, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2491-0035","authenticated-orcid":false,"given":"Qian","family":"Wu","sequence":"additional","affiliation":[{"name":"Yidun AI Lab, NetEase, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0544-0504","authenticated-orcid":false,"given":"Yifeng","family":"Hu","sequence":"additional","affiliation":[{"name":"Yidun AI Lab, NetEase, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0935-2483","authenticated-orcid":false,"given":"Yuke","family":"Li","sequence":"additional","affiliation":[{"name":"Yidun AI Lab, NetEase, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86549-8_21"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00313"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_11"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00252"},{"key":"e_1_3_3_1_6_2","unstructured":"Jacob Devlin Ming-Wei Chang Kenton Lee and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.04805 (2018)."},{"key":"e_1_3_3_1_7_2","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai et\u00a0al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.11929 (2020)."},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.254"},{"key":"e_1_3_3_1_9_2","unstructured":"Bo Han Quanming Yao Xingrui Yu Gang Niu Miao Xu Weihua Hu Ivor Tsang and Masashi Sugiyama. 2018. Co-teaching: Robust training of deep neural networks with extremely noisy labels. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_11_2","unstructured":"Pavel Izmailov Dmitrii Podoprikhin Timur Garipov Dmitry Vetrov and Andrew\u00a0Gordon Wilson. 2018. Averaging weights leads to wider optima and better generalization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1803.05407 (2018)."},{"key":"e_1_3_3_1_12_2","unstructured":"Max Jaderberg Karen Simonyan Andrea Vedaldi and Andrew Zisserman. 2014. Synthetic data and artificial neural networks for natural scene text recognition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1406.2227 (2014)."},{"key":"e_1_3_3_1_13_2","first-page":"1156","volume-title":"International conference on document analysis and recognition (ICDAR)","author":"Karatzas Dimosthenis","year":"2015","unstructured":"Dimosthenis Karatzas, Lluis Gomez-B., Anguelos Nicolaou, Suman Ghosh, Andrew Bagdanov, Masakazu Iwamura, Jiri Matas, Lukas Neumann, Vijay\u00a0Ramaseshan Chandrasekhar, Shijian Lu, et\u00a0al. 2015. Competition on robust reading. In International conference on document analysis and recognition (ICDAR). IEEE, 1156\u20131160."},{"key":"e_1_3_3_1_14_2","first-page":"1484","volume-title":"ICDAR","author":"Karatzas Dimosthenis","year":"2013","unstructured":"Dimosthenis Karatzas, Faisal Shafait, Seiichi Uchida, Masakazu Iwamura, Lluis\u00a0Gomez i Bigorda, Sergi\u00a0Robles Mestre, Joan Mas, David\u00a0Fernandez Mota, Jon\u00a0Almazan Almazan, and Lluis\u00a0Pere De\u00a0Las\u00a0Heras. 2013. robust reading competition. In ICDAR. IEEE, 1484\u20131493."},{"key":"e_1_3_3_1_15_2","first-page":"12888","volume-title":"ICML","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In ICML. PMLR, 12888\u201312900."},{"key":"e_1_3_3_1_16_2","unstructured":"Junyang Lin Xuancheng Ren Yichang Zhang Gao Liu Peng Wang An Yang and Chang Zhou. 2022. Transferring General Multimodal Pretrained Models to Text Recognition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.09297 (2022)."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Haiqing Liu Daoxing Li and Yuancheng Li. 2021. Confident sequence learning: A sequence class-label noise filtering technique to improve scene digit recognition. Journal of Intelligent & Fuzzy Systems 40 5 (2021) 9345\u20139359.","DOI":"10.3233\/JIFS-201825"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Ning Lu Wenwen Yu Xianbiao Qi Yihao Chen Ping Gong Rong Xiao and Xiang Bai. 2021. Master: Multi-aspect non-local network for scene text recognition. Pattern Recognition 117 (2021) 107980.","DOI":"10.1016\/j.patcog.2021.107980"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.5244\/C.26.127"},{"key":"e_1_3_3_1_20_2","first-page":"1582","volume-title":"International conference on document analysis and recognition (ICDAR)","author":"Nayef Nibal","year":"2019","unstructured":"Nibal Nayef, Yash Patel, Michal Busta, Pinaki\u00a0Nath Chowdhury, Dimosthenis Karatzas, Wafa Khlif, Jiri Matas, Umapada Pal, Jean-Christophe Burie, Cheng-lin Liu, et\u00a0al. 2019. Robust reading challenge on multi-lingual scene text detection and recognition\u2014RRC-MLT-2019. In International conference on document analysis and recognition (ICDAR). IEEE, 1582\u20131587."},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Curtis Northcutt Lu Jiang and Isaac Chuang. 2021. Confident learning: Estimating uncertainty in dataset labels. JAIR 70 (2021) 1373\u20131411.","DOI":"10.1613\/jair.1.12125"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.76"},{"key":"e_1_3_3_1_23_2","first-page":"8748","volume-title":"ICML","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In ICML. PMLR, 8748\u20138763."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Anhar Risnumawan Palaiahankote Shivakumara Chee\u00a0Seng Chan and Chew\u00a0Lim Tan. 2014. A robust arbitrary text detection system for natural scene images. Expert Systems with Applications 41 18 (2014) 8027\u20138048.","DOI":"10.1016\/j.eswa.2014.07.008"},{"key":"e_1_3_3_1_25_2","unstructured":"Marius Schubert Tobias Riedlinger Karsten Kahl Daniel Kr\u00f6ll Sebastian Schoenen Sini\u0161a \u0160egvi\u0107 and Matthias Rottmann. 2023. Identifying Label Errors in Object Detection Datasets by Loss Inspection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.06999 (2023)."},{"key":"e_1_3_3_1_26_2","first-page":"1429","volume-title":"International conference on document analysis and recognition (ICDAR)","author":"Shi Baoguang","year":"2017","unstructured":"Baoguang Shi, Cong Yao, Minghui Liao, Mingkun Yang, Pei Xu, Linyan Cui, Serge Belongie, Shijian Lu, and Xiang Bai. 2017. Icdar2017 competition on reading chinese text in the wild. In International conference on document analysis and recognition (ICDAR) , Vol.\u00a01. IEEE, 1429\u20131434."},{"key":"e_1_3_3_1_27_2","first-page":"1557","volume-title":"International conference on document analysis and recognition (ICDAR)","author":"Sun Yipeng","year":"2019","unstructured":"Yipeng Sun, Zihan Ni, Chee-Kheng Chng, Yuliang Liu, Canjie Luo, Chun\u00a0Chet Ng, Junyu Han, Errui Ding, Jingtuo Liu, Dimosthenis Karatzas, et\u00a0al. 2019. competition on large-scale street view text with partial labeling-RRC-LSVT. In International conference on document analysis and recognition (ICDAR). IEEE, 1557\u20131562."},{"key":"e_1_3_3_1_28_2","first-page":"10347","volume-title":"ICML","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In ICML. PMLR, 10347\u201310357."},{"key":"e_1_3_3_1_29_2","unstructured":"A Veit T Matera L Neumann J Matas and S Belongie. [n. d.]. Coco-text: Dataset and benchmark for text detection and recognition in natural images. arXiv 2016. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1601.07140 ([n. d.])."},{"key":"e_1_3_3_1_30_2","first-page":"1457","volume-title":"2011 ICCV","author":"Wang Kai","year":"2011","unstructured":"Kai Wang, Boris Babenko, and Serge Belongie. 2011. End-to-end scene text recognition. In 2011 ICCV. IEEE, 1457\u20131464."},{"key":"e_1_3_3_1_31_2","unstructured":"Zhilin Yang Zihang Dai Yiming Yang Jaime Carbonell Russ\u00a0R Salakhutdinov and Quoc\u00a0V Le. 2019. Xlnet: Generalized autoregressive pretraining for language understanding. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Boqiang Zhang Hongtao Xie Yuxin Wang Jianjun Xu and Yongdong Zhang. 2023. Linguistic More: Taking a Further Step toward Efficient and Accurate Scene Text Recognition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.05140 (2023).","DOI":"10.24963\/ijcai.2023\/189"},{"key":"e_1_3_3_1_33_2","first-page":"1577","volume-title":"International conference on document analysis and recognition (ICDAR)","author":"Zhang Rui","year":"2019","unstructured":"Rui Zhang, Yongsheng Zhou, Qianyi Jiang, Qi Song, Nan Li, Kai Zhou, Lei Wang, et\u00a0al. 2019. Robust reading challenge on reading chinese text on signboard. In International conference on document analysis and recognition (ICDAR). IEEE, 1577\u20131581."},{"key":"e_1_3_3_1_34_2","first-page":"5","volume-title":"SUNw-IEEE\/CVF Conference on Computer Vision and Pattern Recognition","volume":"2017","author":"Zhang Ying","year":"2017","unstructured":"Ying Zhang, Lionel Gueguen, Ilya Zharkov, Peter Zhang, Keith Seifert, and Ben Kadlec. 2017. Uber-text: A large-scale dataset for optical character recognition from street-level imagery. In SUNw-IEEE\/CVF Conference on Computer Vision and Pattern Recognition , Vol.\u00a02017. 5."},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Shuai Zhao Xiaohan Wang Linchao Zhu and Yi Yang. 2023. CLIP4STR: A Simple Baseline for Scene Text Recognition with Pre-trained Vision-Language Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.14014 (2023).","DOI":"10.1109\/TIP.2024.3512354"}],"event":{"name":"MMAsia '25: ACM Multimedia Asia","location":"Kuala Lumpur Malaysia","acronym":"MMAsia '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 7th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3743093.3771031","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:10:38Z","timestamp":1765008638000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3743093.3771031"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":34,"alternative-id":["10.1145\/3743093.3771031","10.1145\/3743093"],"URL":"https:\/\/doi.org\/10.1145\/3743093.3771031","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"2025-12-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}