{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:41:25Z","timestamp":1755823285079,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612059","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"2112-2120","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["PBFormer: Capturing Complex Scene Text Shape with Polynomial Band Transformer"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3672-5332","authenticated-orcid":false,"given":"Ruijin","family":"Liu","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3399-3681","authenticated-orcid":false,"given":"Ning","family":"Lu","sequence":"additional","affiliation":[{"name":"Huawei Technologies Ltd., Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2490-1703","authenticated-orcid":false,"given":"Dapeng","family":"Chen","sequence":"additional","affiliation":[{"name":"Huawei Technologies Ltd., Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4599-8575","authenticated-orcid":false,"given":"Cheng","family":"LI","sequence":"additional","affiliation":[{"name":"Huawei Technologies Ltd., Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5548-3634","authenticated-orcid":false,"given":"Zejian","family":"Yuan","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0868-0974","authenticated-orcid":false,"given":"Wei","family":"Peng","sequence":"additional","affiliation":[{"name":"Huawei Technologies Ltd., Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"crossref","unstructured":"Youngmin Baek Bado Lee Dongyoon Han Sangdoo Yun and Hwalsuk Lee. 2019. Character Region Awareness for Text Detection. In CVPR. 9365--9374.","DOI":"10.1109\/CVPR.2019.00959"},{"volume-title":"ECCV (29) (Lecture Notes in Computer Science","author":"Baek Youngmin","key":"e_1_3_2_2_2_1","unstructured":"Youngmin Baek, Seung Shin, Jeonghun Baek, Sungrae Park, Junyeop Lee, Daehyun Nam, and Hwalsuk Lee. 2020. Character Region Attention for Text Spotting. In ECCV (29) (Lecture Notes in Computer Science, Vol. 12374). 504--521."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Chee Kheng Chng and Chee Seng Chan. 2017. Total-Text: A Comprehensive Dataset for Scene Text Detection and Recognition. In ICDAR. 935--942.","DOI":"10.1109\/ICDAR.2017.157"},{"key":"e_1_3_2_2_4_1","volume-title":"ICDAR2019 Robust Reading Challenge on Arbitrary-Shaped Text - RRC-ArT. In ICDAR.","author":"Chng Chee Kheng","year":"2019","unstructured":"Chee Kheng Chng, Errui Ding, Jingtuo Liu, Dimosthenis Karatzas, Chee Seng Chan, Lianwen Jin, Yuliang Liu, Yipeng Sun, Chun Chet Ng, Canjie Luo, Zihan Ni, ChuanMing Fang, Shuaitao Zhang, and Junyu Han. 2019. ICDAR2019 Robust Reading Challenge on Arbitrary-Shaped Text - RRC-ArT. In ICDAR."},{"key":"e_1_3_2_2_5_1","unstructured":"Pengwen Dai Sanyi Zhang Hua Zhang and Xiaochun Cao. 2021. Progressive Contour Regression for Arbitrary-Shape Scene Text Detection. In CVPR. 7393--7402."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Dan Deng Haifeng Liu Xuelong Li and Deng Cai. 2018. PixelLink: Detecting Scene Text via Instance Segmentation. In AAAI. 6773--6780.","DOI":"10.1609\/aaai.v32i1.12269"},{"key":"e_1_3_2_2_7_1","volume-title":"GTC: Guided Training of CTC towards Efficient and Accurate Scene Text Recognition. In AAAI. 11005--11012.","author":"Hu Wenyang","year":"2020","unstructured":"Wenyang Hu, Xiaocong Cai, Jun Hou, Shuai Yi, and Zhiping Lin. 2020. GTC: Guided Training of CTC towards Efficient and Accurate Scene Text Recognition. In AAAI. 11005--11012."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Hui Li Peng Wang and Chunhua Shen. 2017. Towards End-to-End Text Spotting with Convolutional Recurrent Neural Networks. In ICCV. 5248--5256.","DOI":"10.1109\/ICCV.2017.560"},{"key":"e_1_3_2_2_9_1","volume-title":"Towards End-to-End Text Spotting in Natural Scenes. CoRR","author":"Li Hui","year":"2019","unstructured":"Hui Li, Peng Wang, and Chunhua Shen. 2019. Towards End-to-End Text Spotting in Natural Scenes. CoRR, Vol. abs\/1906.06013 (2019)."},{"key":"e_1_3_2_2_10_1","volume-title":"Cha Zhang, Zhoujun Li, and Furu Wei.","author":"Li Minghao","year":"2023","unstructured":"Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei A. F. Flor\u00ea ncio, Cha Zhang, Zhoujun Li, and Furu Wei. 2023. TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models. In AAAI."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2937086"},{"volume-title":"ECCV (11) (Lecture Notes in Computer Science","author":"Liao Minghui","key":"e_1_3_2_2_12_1","unstructured":"Minghui Liao, Guan Pang, Jing Huang, Tal Hassner, and Xiang Bai. 2020a. Mask TextSpotter v3: Segmentation Proposal Network for Robust Scene Text Spotting. In ECCV (11) (Lecture Notes in Computer Science, Vol. 12356). 706--722."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2825107"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"Minghui Liao Zhaoyi Wan Cong Yao Kai Chen and Xiang Bai. 2020b. Real-Time Scene Text Detection with Differentiable Binarization. In AAAI. 11474--11481.","DOI":"10.1609\/aaai.v34i07.6812"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"crossref","unstructured":"Minghui Liao Zhen Zhu Baoguang Shi Gui-Song Xia and Xiang Bai. 2018b. Rotation-Sensitive Regression for Oriented Scene Text Detection. In CVPR. 5909--5918.","DOI":"10.1109\/CVPR.2018.00619"},{"key":"e_1_3_2_2_16_1","volume-title":"Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion. CoRR","author":"Liao Minghui","year":"2022","unstructured":"Minghui Liao, Zhisheng Zou, Zhaoyi Wan, Cong Yao, and Xiang Bai. 2022. Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion. CoRR, Vol. abs\/2202.10304 (2022)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"crossref","unstructured":"Ruijin Liu Dapeng Chen Tie Liu Zhiliang Xiong and Zejian Yuan. 2022. Learning to Predict 3D Lane Shape and Camera Pose from a Single Image via Geometry Constraints. In AAAI. 1765--1772.","DOI":"10.1609\/aaai.v36i2.20069"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"Ruijin Liu Zejian Yuan Tie Liu and Zhiliang Xiong. 2021b. End-to-end Lane Shape Prediction with Transformers. In WACV. 3693--3701.","DOI":"10.1109\/WACV48630.2021.00374"},{"key":"e_1_3_2_2_19_1","unstructured":"Yuliang Liu Hao Chen Chunhua Shen Tong He Lianwen Jin and Liangwei Wang. 2020a. ABCNet: Real-Time Scene Text Spotting With Adaptive Bezier-Curve Network. In CVPR. 9806--9815."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2954218"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.02.002"},{"key":"e_1_3_2_2_22_1","volume-title":"ABCNet v2: Adaptive Bezier-Curve Network for Real-time End-to-end Text Spotting. CoRR","author":"Liu Yuliang","year":"2021","unstructured":"Yuliang Liu, Chunhua Shen, Lianwen Jin, Tong He, Peng Chen, Chongyu Liu, and Hao Chen. 2021a. ABCNet v2: Adaptive Bezier-Curve Network for Real-time End-to-end Text Spotting. CoRR, Vol. abs\/2105.03620 (2021)."},{"key":"e_1_3_2_2_23_1","unstructured":"Zichuan Liu Guosheng Lin Sheng Yang Fayao Liu Weisi Lin and Wang Ling Goh. 2019b. Towards Robust Curve Text Detection With Conditional Spatial Expansion. In CVPR. 7269--7278."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01369-0"},{"volume-title":"ECCV (2)","author":"Long Shangbang","key":"e_1_3_2_2_25_1","unstructured":"Shangbang Long, Jiaqiang Ruan, Wenjie Zhang, Xin He, Wenhao Wu, and Cong Yao. 2018. TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes. In ECCV (2), Vol. 11206. 19--35."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107684"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00254"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"crossref","unstructured":"Xugong Qin Yu Zhou Youhui Guo Dayan Wu Zhihong Tian Ning Jiang Hongbin Wang and Weiping Wang. 2021. Mask is All You Need: Rethinking Mask R-CNN for Dense and Arbitrary-Shaped Scene Text Detection. In ACM Multimedia. 414--423.","DOI":"10.1145\/3474085.3475178"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"crossref","unstructured":"Sangeeth Reddy Minesh Mathew Llu\u00eds G\u00f3mez Mar\u00e7al Rusi\u00f1ol Dimosthenis Karatzas and C. V. Jawahar. 2020. RoadText-1K: Text Detection & Recognition Dataset for Driving Videos. In ICRA. 11074--11080.","DOI":"10.1109\/ICRA40945.2020.9196577"},{"key":"e_1_3_2_2_30_1","unstructured":"Tao Sheng Jie Chen and Zhouhui Lian. 2021. CentripetalText: An Efficient Text Instance Representation for Scene Text Detection. In NeurIPS. 335--346."},{"key":"e_1_3_2_2_31_1","volume-title":"Belongie","author":"Shi Baoguang","year":"2017","unstructured":"Baoguang Shi, Xiang Bai, and Serge J. Belongie. 2017. Detecting Oriented Text in Natural Images by Linking Segments. In CVPR. 3482--3490."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.06.020"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"crossref","unstructured":"Jingqun Tang Wenqing Zhang Hongye Liu Mingkun Yang Bo Jiang Guanglong Hu and Xiang Bai. 2022. Few Could Be Better Than All: Feature Sampling and Grouping for Scene Text Detection. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00452"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"crossref","unstructured":"Zhuotao Tian Michelle Shu Pengyuan Lyu Ruiyu Li Chao Zhou Xiaoyong Shen and Jiaya Jia. 2019a. Learning Shape-Aware Embedding for Scene Text Detection. In CVPR. 4234--4243.","DOI":"10.1109\/CVPR.2019.00436"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"crossref","unstructured":"Zhuotao Tian Michelle Shu Pengyuan Lyu Ruiyu Li Chao Zhou Xiaoyong Shen and Jiaya Jia. 2019b. Learning Shape-Aware Embedding for Scene Text Detection. In CVPR. 4234--4243.","DOI":"10.1109\/CVPR.2019.00436"},{"key":"e_1_3_2_2_36_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. In NIPS. 5998--6008."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"crossref","unstructured":"Fangfang Wang Yifeng Chen Fei Wu and Xi Li. 2020a. TextRay: Contour-based Geometric Modeling for Arbitrary-shaped Scene Text Detection. In ACM Multimedia. ACM 111--119.","DOI":"10.1145\/3394171.3413819"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"crossref","unstructured":"Hao Wang Pu Lu Hui Zhang Mingkun Yang Xiang Bai Yongchao Xu Mengchao He Yongpan Wang and Wenyu Liu. 2020b. All You Need Is Boundary: Toward Arbitrary-Shaped Text Spotting. In AAAI. 12160--12167.","DOI":"10.1609\/aaai.v34i07.6896"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"crossref","unstructured":"Pengfei Wang Chengquan Zhang Fei Qi Zuming Huang Mengyi En Junyu Han Jingtuo Liu Errui Ding and Guangming Shi. 2019d. A Single-Shot Arbitrarily-Shaped Text Detector based on Context Attended Multi-Task Learning. In ACM Multimedia. 1277--1285.","DOI":"10.1145\/3343031.3350988"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"crossref","unstructured":"Wenhai Wang Enze Xie Xiang Li Wenbo Hou Tong Lu Gang Yu and Shuai Shao. 2019b. Shape Robust Text Detection With Progressive Scale Expansion Network. In CVPR. 9336--9345.","DOI":"10.1109\/CVPR.2019.00956"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00853"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"crossref","unstructured":"Wei Wang Yu Zhou Jiahao Lv Dayan Wu Guoqing Zhao Ning Jiang and Weiping Wang. 2022. TPSNet: Reverse Thinking of Thin Plate Splines for Arbitrary Shape Scene Text Representation. In ACM Multimedia. 5014--5025.","DOI":"10.1145\/3503161.3547882"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"crossref","unstructured":"Xiaobing Wang Yingying Jiang Zhenbo Luo Cheng-Lin Liu Hyunsoo Choi and Sungjin Kim. 2019a. Arbitrary Shape Scene Text Detection With Adaptive Text Region Representation. In CVPR. 6449--6458.","DOI":"10.1109\/CVPR.2019.00661"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"crossref","unstructured":"Yuxin Wang Hongtao Xie Zheng-Jun Zha Mengting Xing Zilong Fu and Yongdong Zhang. 2020c. ContourNet: Taking a Further Step Toward Accurate Arbitrary-Shaped Scene Text Detection. In CVPR. 11750--11759.","DOI":"10.1109\/CVPR42600.2020.01177"},{"key":"e_1_3_2_2_45_1","volume-title":"MSR: Multi-Scale Shape Regression for Scene Text Detection. In IJCAI. 989--995.","author":"Xue Chuhui","year":"2019","unstructured":"Chuhui Xue, Shijian Lu, and Wei Zhang. 2019. MSR: Multi-Scale Shape Regression for Scene Text Detection. In IJCAI. 989--995."},{"key":"e_1_3_2_2_46_1","volume-title":"ESIR: End-To-End Scene Text Recognition via Iterative Image Rectification. In CVPR. 2059--2068.","author":"Zhan Fangneng","year":"2019","unstructured":"Fangneng Zhan and Shijian Lu. 2019. ESIR: End-To-End Scene Text Recognition via Iterative Image Rectification. In CVPR. 2059--2068."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"crossref","unstructured":"Chengquan Zhang Borong Liang Zuming Huang Mengyi En Junyu Han Errui Ding and Xinghao Ding. 2019. Look More Than Once: An Accurate Detector for Text of Arbitrary Shapes. In CVPR. 10552--10561.","DOI":"10.1109\/CVPR.2019.01080"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"crossref","unstructured":"Shi-Xue Zhang Xiaobin Zhu Jie-Bo Hou Chang Liu Chun Yang Hongfa Wang and Xu-Cheng Yin. 2020. Deep Relational Reasoning Graph Network for Arbitrary Shape Text Detection. In CVPR. 9696--9705.","DOI":"10.1109\/CVPR42600.2020.00972"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"crossref","unstructured":"Shi-Xue Zhang Xiaobin Zhu Chun Yang Hongfa Wang and Xu-Cheng Yin. 2021. Adaptive Boundary Proposal Network for Arbitrary Shape Text Detection. In ICCV. 1285--1294.","DOI":"10.1109\/ICCV48922.2021.00134"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"crossref","unstructured":"Xiang Zhang Yongwen Su Subarna Tripathi and Zhuowen Tu. 2022. Text Spotting Transformers. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00930"},{"key":"e_1_3_2_2_51_1","volume-title":"EAST: An Efficient and Accurate Scene Text Detector","author":"Zhou Xinyu","year":"2017","unstructured":"Xinyu Zhou, Cong Yao, He Wen, Yuzhi Wang, Shuchang Zhou, Weiran He, and Jiajun Liang. 2017. EAST: An Efficient and Accurate Scene Text Detector. In CVPR. IEEE Computer Society, 2642--2651."},{"key":"e_1_3_2_2_52_1","unstructured":"Xizhou Zhu Weijie Su Lewei Lu Bin Li Xiaogang Wang and Jifeng Dai. 2021b. Deformable DETR: Deformable Transformers for End-to-End Object Detection. In ICLR."},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"crossref","unstructured":"Yiqin Zhu Jianyong Chen Lingyu Liang Zhanghui Kuang Lianwen Jin and Wayne Zhang. 2021a. Fourier Contour Embedding for Arbitrary-Shaped Text Detection. In CVPR. 3123--3131.","DOI":"10.1109\/CVPR46437.2021.00314"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Ottawa ON Canada","acronym":"MM '23"},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612059","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612059","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:02:09Z","timestamp":1755820929000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612059"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":53,"alternative-id":["10.1145\/3581783.3612059","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612059","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}