{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T09:08:45Z","timestamp":1768295325504,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":78,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the National Natural Science Foundation of China","award":["62006221"],"award-info":[{"award-number":["62006221"]}]},{"name":"the Beijing Municipal Science & Technology Commission","award":["Z191100007119002"],"award-info":[{"award-number":["Z191100007119002"]}]},{"name":"the Key Research Program of Frontier Sciences, CAS","award":["ZDBS-LY-7024"],"award-info":[{"award-number":["ZDBS-LY-7024"]}]},{"name":"CAAI-Huawei MindSpore Open Fund"},{"name":"the State Key Laboratory of Media Convergence and Communication, Communication University of China, China","award":["SKLMCC2020KF004"],"award-info":[{"award-number":["SKLMCC2020KF004"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475178","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T20:00:05Z","timestamp":1634587205000},"page":"414-423","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":34,"title":["Mask is All You Need"],"prefix":"10.1145","author":[{"given":"Xugong","family":"Qin","sequence":"first","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]},{"given":"Yu","family":"Zhou","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]},{"given":"Youhui","family":"Guo","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]},{"given":"Dayan","family":"Wu","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Zhihong","family":"Tian","sequence":"additional","affiliation":[{"name":"Guangzhou University, Guangzhou, China"}]},{"given":"Ning","family":"Jiang","sequence":"additional","affiliation":[{"name":"Mashang Consumer Finance Co., Ltd., Beijing, China"}]},{"given":"Hongbin","family":"Wang","sequence":"additional","affiliation":[{"name":"Mashang Consumer Finance Co., Ltd., Beijing, China"}]},{"given":"Weiping","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Youngmin Baek Bado Lee Dongyoon Han Sangdoo Yun and Hwalsuk Lee. 2019. Character region awareness for text detection. In CVPR. 9365--9374. Youngmin Baek Bado Lee Dongyoon Han Sangdoo Yun and Hwalsuk Lee. 2019. Character region awareness for text detection. In CVPR. 9365--9374.","DOI":"10.1109\/CVPR.2019.00959"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Yudi Chen Wei Wang Yu Zhou Fei Yang Dongbao Yang and Weiping Wang. 2020. Self-Training for Domain Adaptive Scene Text Detection. In ICPR. 850--857. Yudi Chen Wei Wang Yu Zhou Fei Yang Dongbao Yang and Weiping Wang. 2020. Self-Training for Domain Adaptive Scene Text Detection. In ICPR. 850--857.","DOI":"10.1109\/ICPR48806.2021.9412558"},{"key":"e_1_3_2_1_3_1","first-page":"137","article-title":"Constrained Relation Network for Character Detection in Scene Images","volume":"11672","author":"Chen Yudi","year":"2019","unstructured":"Yudi Chen , Yu Zhou , Dongbao Yang , and Weiping Wang . 2019 . Constrained Relation Network for Character Detection in Scene Images . In PRICAI , Vol. 11672. 137 -- 149 . Yudi Chen, Yu Zhou, Dongbao Yang, and Weiping Wang. 2019. Constrained Relation Network for Character Detection in Scene Images. In PRICAI, Vol. 11672. 137--149.","journal-title":"PRICAI"},{"key":"e_1_3_2_1_4_1","volume-title":"Total-text: A comprehensive dataset for scene text detection and recognition. In ICDAR. 935--942.","author":"Ch'ng Chee Kheng","year":"2017","unstructured":"Chee Kheng Ch'ng and Chee Seng Chan . 2017 . Total-text: A comprehensive dataset for scene text detection and recognition. In ICDAR. 935--942. Chee Kheng Ch'ng and Chee Seng Chan. 2017. Total-text: A comprehensive dataset for scene text detection and recognition. In ICDAR. 935--942."},{"key":"e_1_3_2_1_5_1","unstructured":"Xuangeng Chu Anlin Zheng Xiangyu Zhang and Jian Sun. 2020. Detection in crowded scenes: One proposal multiple predictions. In CVPR. 12214--12223. Xuangeng Chu Anlin Zheng Xiangyu Zhang and Jian Sun. 2020. Detection in crowded scenes: One proposal multiple predictions. In CVPR. 12214--12223."},{"key":"e_1_3_2_1_6_1","volume-title":"Pixellink: Detecting scene text via instance segmentation. In AAAI. 6773--6780.","author":"Deng Dan","year":"2018","unstructured":"Dan Deng , Haifeng Liu , Xuelong Li , and Deng Cai . 2018 . Pixellink: Detecting scene text via instance segmentation. In AAAI. 6773--6780. Dan Deng, Haifeng Liu, Xuelong Li, and Deng Cai. 2018. Pixellink: Detecting scene text via instance segmentation. In AAAI. 6773--6780."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Wei Feng Wenhao He Fei Yin Xu-Yao Zhang and Cheng-Lin Liu. 2019. TextDragon: An end-to-end framework for arbitrary shaped text spotting. In ICCV. 9076--9085. Wei Feng Wenhao He Fei Yin Xu-Yao Zhang and Cheng-Lin Liu. 2019. TextDragon: An end-to-end framework for arbitrary shaped text spotting. In ICCV. 9076--9085.","DOI":"10.1109\/ICCV.2019.00917"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Ankush Gupta Andrea Vedaldi and Andrew Zisserman. 2016. Synthetic data for text localisation in natural images. In CVPR. 2315--2324. Ankush Gupta Andrea Vedaldi and Andrew Zisserman. 2016. Synthetic data for text localisation in natural images. In CVPR. 2315--2324.","DOI":"10.1109\/CVPR.2016.254"},{"key":"e_1_3_2_1_10_1","unstructured":"Kaiming He Georgia Gkioxari Piotr Doll\u00e1r and Ross Girshick. 2017a. Mask R-CNN. In ICCV. 2980--2988. Kaiming He Georgia Gkioxari Piotr Doll\u00e1r and Ross Girshick. 2017a. Mask R-CNN. In ICCV. 2980--2988."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Pan He Weilin Huang Tong He Qile Zhu Yu Qiao and Xiaolin Li. 2017b. Single shot text detector with regional attention. In ICCV. 3047--3055. Pan He Weilin Huang Tong He Qile Zhu Yu Qiao and Xiaolin Li. 2017b. Single shot text detector with regional attention. In ICCV. 3047--3055.","DOI":"10.1109\/ICCV.2017.331"},{"key":"e_1_3_2_1_12_1","unstructured":"Wenhao He Xu-Yao Zhang Fei Yin and Cheng-Lin Liu. 2017c. Deep direct regression for multi-oriented scene text detection. In ICCV. 745--753. Wenhao He Xu-Yao Zhang Fei Yin and Cheng-Lin Liu. 2017c. Deep direct regression for multi-oriented scene text detection. In ICCV. 745--753."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"e_1_3_2_1_14_1","volume-title":"Video 3D Sampling for Self-supervised Representation Learning. CoRR","author":"Li Wei","year":"2021","unstructured":"Wei Li , Dezhao Luo , Bo Fang , Yu Zhou , and Weiping Wang . 2021. Video 3D Sampling for Self-supervised Representation Learning. CoRR , Vol. abs\/ 2107 .03578 ( 2021 ). Wei Li, Dezhao Luo, Bo Fang, Yu Zhou, and Weiping Wang. 2021. Video 3D Sampling for Self-supervised Representation Learning. CoRR, Vol. abs\/2107.03578 (2021)."},{"key":"e_1_3_2_1_15_1","volume-title":"Mask textSpotter: An end-to-end trainable neural network for spotting text with arbitrary shapes","author":"Liao Minghui","year":"2021","unstructured":"Minghui Liao , Pengyuan Lyu , Minghang He , Cong Yao , Wenhao Wu , and Xiang Bai . 2021. Mask textSpotter: An end-to-end trainable neural network for spotting text with arbitrary shapes . IEEE TPAMI ( 2021 ), 532--548. Minghui Liao, Pengyuan Lyu, Minghang He, Cong Yao, Wenhao Wu, and Xiang Bai. 2021. Mask textSpotter: An end-to-end trainable neural network for spotting text with arbitrary shapes. IEEE TPAMI (2021), 532--548."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Minghui Liao Guan Pang Jing Huang Tal Hassner and Xiang Bai. 2020 a. Mask TextSpotter v3: Segmentation proposal network for robust scene text spotting. In ECCV. 706--722. Minghui Liao Guan Pang Jing Huang Tal Hassner and Xiang Bai. 2020 a. Mask TextSpotter v3: Segmentation proposal network for robust scene text spotting. In ECCV. 706--722.","DOI":"10.1007\/978-3-030-58621-8_41"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2825107"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Minghui Liao Zhaoyi Wan Cong Yao Kai Chen and Xiang Bai. 2020 b. Real-time scene text detection with differentiable binarization. In AAAI. 11474--11481. Minghui Liao Zhaoyi Wan Cong Yao Kai Chen and Xiang Bai. 2020 b. Real-time scene text detection with differentiable binarization. In AAAI. 11474--11481.","DOI":"10.1609\/aaai.v34i07.6812"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Minghui Liao Zhen Zhu Baoguang Shi Gui-song Xia and Xiang Bai. 2018b. Rotation-sensitive regression for oriented scene text detection. In CVPR. 5909--5918. Minghui Liao Zhen Zhu Baoguang Shi Gui-song Xia and Xiang Bai. 2018b. Rotation-sensitive regression for oriented scene text detection. In CVPR. 5909--5918.","DOI":"10.1109\/CVPR.2018.00619"},{"key":"e_1_3_2_1_20_1","unstructured":"Tsung-Yi Lin Piotr Doll\u00e1r Ross Girshick Kaiming He Bharath Hariharan and Serge Belongie. 2017. Feature pyramid networks for object detection. In CVPR. 2117--2125. Tsung-Yi Lin Piotr Doll\u00e1r Ross Girshick Kaiming He Bharath Hariharan and Serge Belongie. 2017. Feature pyramid networks for object detection. In CVPR. 2117--2125."},{"key":"e_1_3_2_1_21_1","volume-title":"2020 a. ASTS: A unified framework for arbitrary shape text spotting","author":"Liu Juhua","year":"2020","unstructured":"Juhua Liu , Zhe Chen , Bo Du , and Dacheng Tao . 2020 a. ASTS: A unified framework for arbitrary shape text spotting . IEEE TIP ( 2020 ), 5924--5936. Juhua Liu, Zhe Chen, Bo Du, and Dacheng Tao. 2020 a. ASTS: A unified framework for arbitrary shape text spotting. IEEE TIP (2020), 5924--5936."},{"key":"e_1_3_2_1_22_1","unstructured":"Yuliang Liu Hao Chen Chunhua Shen Tong He Lianwen Jin and Liangwei Wang. 2020 b. ABCNet: Real-time scene text spotting with adaptive bezier-curve network. In CVPR. 9806--9815. Yuliang Liu Hao Chen Chunhua Shen Tong He Lianwen Jin and Liangwei Wang. 2020 b. ABCNet: Real-time scene text spotting with adaptive bezier-curve network. In CVPR. 9806--9815."},{"key":"e_1_3_2_1_23_1","unstructured":"Yuliang Liu and Lianwen Jin. 2017. Deep matching prior network: Toward tighter multi-oriented text detection. In CVPR. 3454--3461. Yuliang Liu and Lianwen Jin. 2017. Deep matching prior network: Toward tighter multi-oriented text detection. In CVPR. 3454--3461."},{"key":"e_1_3_2_1_24_1","volume-title":"2019 a. Arbitrarily shaped scene text detection with a mask tightness text detector","author":"Liu Yuliang","year":"2019","unstructured":"Yuliang Liu , Lianwen Jin , and Chuanming Fang . 2019 a. Arbitrarily shaped scene text detection with a mask tightness text detector . IEEE TIP ( 2019 ), 2918--2930. Yuliang Liu, Lianwen Jin, and Chuanming Fang. 2019 a. Arbitrarily shaped scene text detection with a mask tightness text detector. IEEE TIP (2019), 2918--2930."},{"key":"e_1_3_2_1_25_1","volume-title":"2019 b. Curved scene text detection via transverse and longitudinal sequence connection. PR","author":"Liu Yuliang","year":"2019","unstructured":"Yuliang Liu , Lianwen Jin , Shuaitao Zhang , Canjie Luo , and Sheng Zhang . 2019 b. Curved scene text detection via transverse and longitudinal sequence connection. PR ( 2019 ), 337--345. Yuliang Liu, Lianwen Jin, Shuaitao Zhang, Canjie Luo, and Sheng Zhang. 2019 b. Curved scene text detection via transverse and longitudinal sequence connection. PR (2019), 337--345."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/3367243.3367463"},{"key":"e_1_3_2_1_27_1","unstructured":"Zichuan Liu Guosheng Lin Sheng Yang Jiashi Feng Weisi Lin and Wang Ling Goh. 2018. Learning markov clustering networks for scene text detection. In CVPR. 6936--6944. Zichuan Liu Guosheng Lin Sheng Yang Jiashi Feng Weisi Lin and Wang Ling Goh. 2018. Learning markov clustering networks for scene text detection. In CVPR. 6936--6944."},{"key":"e_1_3_2_1_28_1","unstructured":"Zichuan Liu Guosheng Lin Sheng Yang Fayao Liu Weisi Lin and Wang Ling Goh. 2019 c. Towards robust curve text detection with conditional spatial expansion. In CVPR. 7269--7278. Zichuan Liu Guosheng Lin Sheng Yang Fayao Liu Weisi Lin and Wang Ling Goh. 2019 c. Towards robust curve text detection with conditional spatial expansion. In CVPR. 7269--7278."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Jonathan Long Evan Shelhamer and Trevor Darrell. 2015. Fully convolutional networks for semantic segmentation. In CVPR. 3431--3440. Jonathan Long Evan Shelhamer and Trevor Darrell. 2015. Fully convolutional networks for semantic segmentation. In CVPR. 3431--3440.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_2_1_30_1","volume-title":"Textsnake: A flexible representation for detecting text of arbitrary shapes. In ECCV. 20--36.","author":"Long Shangbang","year":"2018","unstructured":"Shangbang Long , Jiaqiang Ruan , Wenjie Zhang , Xin He , Wenhao Wu , and Cong Yao . 2018 . Textsnake: A flexible representation for detecting text of arbitrary shapes. In ECCV. 20--36. Shangbang Long, Jiaqiang Ruan, Wenjie Zhang, Xin He, Wenhao Wu, and Cong Yao. 2018. Textsnake: A flexible representation for detecting text of arbitrary shapes. In ECCV. 20--36."},{"key":"e_1_3_2_1_31_1","volume-title":"2020 a. Exploring Relations in Untrimmed Videos for Self-Supervised Learning. CoRR","author":"Luo Dezhao","year":"2020","unstructured":"Dezhao Luo , Bo Fang , Yu Zhou , Yucan Zhou , Dayan Wu , and Weiping Wang . 2020 a. Exploring Relations in Untrimmed Videos for Self-Supervised Learning. CoRR , Vol. abs\/ 2008 .02711 ( 2020 ). Dezhao Luo, Bo Fang, Yu Zhou, Yucan Zhou, Dayan Wu, and Weiping Wang. 2020 a. Exploring Relations in Untrimmed Videos for Self-Supervised Learning. CoRR, Vol. abs\/2008.02711 (2020)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Dezhao Luo Chang Liu Yu Zhou Dongbao Yang Can Ma Qixiang Ye and Weiping Wang. 2020 b. Video Cloze Procedure for Self-Supervised Spatio-Temporal Learning. In AAAI. 11701--11708. Dezhao Luo Chang Liu Yu Zhou Dongbao Yang Can Ma Qixiang Ye and Weiping Wang. 2020 b. Video Cloze Procedure for Self-Supervised Spatio-Temporal Learning. In AAAI. 11701--11708.","DOI":"10.1609\/aaai.v34i07.6840"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Pengyuan Lyu Minghui Liao Cong Yao Wenhao Wu and Xiang Bai. 2018a. Mask textSpotter: An end-to-end trainable neural network for spotting text with arbitrary shapes. In ECCV. 71--88. Pengyuan Lyu Minghui Liao Cong Yao Wenhao Wu and Xiang Bai. 2018a. Mask textSpotter: An end-to-end trainable neural network for spotting text with arbitrary shapes. In ECCV. 71--88.","DOI":"10.1007\/978-3-030-01264-9_5"},{"key":"e_1_3_2_1_34_1","unstructured":"Pengyuan Lyu Cong Yao Wenhao Wu Shuicheng Yan and Xiang Bai. 2018b. Multi-oriented scene text detection via corner localization and region segmentation. In CVPR. 7553--7563. Pengyuan Lyu Cong Yao Wenhao Wu Shuicheng Yan and Xiang Bai. 2018b. Multi-oriented scene text detection via corner localization and region segmentation. In CVPR. 7553--7563."},{"key":"e_1_3_2_1_35_1","volume-title":"ReLaText: Exploiting visual relationships for arbitrary-shaped scene text detection with graph convolutional networks. PR","author":"Ma Chixiang","year":"2021","unstructured":"Chixiang Ma , Lei Sun , Zhuoyao Zhong , and Qiang Huo . 2021. ReLaText: Exploiting visual relationships for arbitrary-shaped scene text detection with graph convolutional networks. PR ( 2021 ), 337--345. Chixiang Ma, Lei Sun, Zhuoyao Zhong, and Qiang Huo. 2021. ReLaText: Exploiting visual relationships for arbitrary-shaped scene text detection with graph convolutional networks. PR (2021), 337--345."},{"key":"e_1_3_2_1_36_1","unstructured":"Chixiang Ma Zhuoyao Zhong Lei Sun and Qiang Huo. 2019. A relation network based approach to curved text detection. In ICDAR. 707--713. Chixiang Ma Zhuoyao Zhong Lei Sun and Qiang Huo. 2019. A relation network based approach to curved text detection. In ICDAR. 707--713."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2818020"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Nibal Nayef Fei Yin Imen Bizid Hyunsoo Choi Yuan Feng Dimosthenis Karatzas Zhenbo Luo Umapada Pal Christophe Rigaud Joseph Chazalon etal 2017. Icdar2017 robust reading challenge on multi-lingual scene text detection and script identification-rrc-mlt. In ICDAR. 1454--1459. Nibal Nayef Fei Yin Imen Bizid Hyunsoo Choi Yuan Feng Dimosthenis Karatzas Zhenbo Luo Umapada Pal Christophe Rigaud Joseph Chazalon et al. 2017. Icdar2017 robust reading challenge on multi-lingual scene text detection and script identification-rrc-mlt. In ICDAR. 1454--1459.","DOI":"10.1109\/ICDAR.2017.237"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Zhi Qiao Xugong Qin Yu Zhou Fei Yang and Weiping Wang. 2020 a. Gaussian Constrained Attention Network for Scene Text Recognition. In ICPR. 3328--3335. Zhi Qiao Xugong Qin Yu Zhou Fei Yang and Weiping Wang. 2020 a. Gaussian Constrained Attention Network for Scene Text Recognition. In ICPR. 3328--3335.","DOI":"10.1109\/ICPR48806.2021.9412806"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Zhi Qiao Yu Zhou Dongbao Yang Yucan Zhou and Weiping Wang. 2020 b. SEED: Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition. In CVPR. 13525--13534. Zhi Qiao Yu Zhou Dongbao Yang Yucan Zhou and Weiping Wang. 2020 b. SEED: Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition. In CVPR. 13525--13534.","DOI":"10.1109\/CVPR42600.2020.01354"},{"key":"e_1_3_2_1_41_1","unstructured":"Siyang Qin Alessandro Bissacco Michalis Raptis Yasuhisa Fujii and Ying Xiao. 2019 a. Towards unconstrained end-to-end text spotting. In ICCV. 4704--4714. Siyang Qin Alessandro Bissacco Michalis Raptis Yasuhisa Fujii and Ying Xiao. 2019 a. Towards unconstrained end-to-end text spotting. In ICCV. 4704--4714."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Xugong Qin Yu Zhou Youhui Guo Dayan Wu and Weiping Wang. 2021. FC(^2 )RN: A Fully Convolutional Corner Refinement Network for Accurate Multi-Oriented Scene Text Detection. In ICASSP. 4350--4354. Xugong Qin Yu Zhou Youhui Guo Dayan Wu and Weiping Wang. 2021. FC(^2 )RN: A Fully Convolutional Corner Refinement Network for Accurate Multi-Oriented Scene Text Detection. In ICASSP. 4350--4354.","DOI":"10.1109\/ICASSP39728.2021.9413821"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Xugong Qin Yu Zhou Dongbao Yang and Weiping Wang. 2019 b. Curved Text Detection in Natural Scene Images with Semi- and Weakly-Supervised Learning. In ICDAR. 559--564. Xugong Qin Yu Zhou Dongbao Yang and Weiping Wang. 2019 b. Curved Text Detection in Natural Scene Images with Semi- and Weakly-Supervised Learning. In ICDAR. 559--564.","DOI":"10.1109\/ICDAR.2019.00095"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.5555\/2969239.2969250"},{"key":"e_1_3_2_1_45_1","unstructured":"Baoguang Shi Xiang Bai and Serge Belongie. 2017a. Detecting oriented text in natural images by linking segments. In CVPR. 2550--2558. Baoguang Shi Xiang Bai and Serge Belongie. 2017a. Detecting oriented text in natural images by linking segments. In CVPR. 2550--2558."},{"key":"e_1_3_2_1_46_1","unstructured":"Baoguang Shi Cong Yao Minghui Liao Mingkun Yang Pei Xu Linyan Cui Serge Belongie Shijian Lu and Xiang Bai. 2017b. Icdar2017 competition on reading chinese text in the wild (rctw-17). In ICDAR. 1429--1434. Baoguang Shi Cong Yao Minghui Liao Mingkun Yang Pei Xu Linyan Cui Serge Belongie Shijian Lu and Xiang Bai. 2017b. Icdar2017 competition on reading chinese text in the wild (rctw-17). In ICDAR. 1429--1434."},{"key":"e_1_3_2_1_47_1","volume-title":"Detecting dense and arbitrary-shaped scene text by instance-aware component grouping. PR","author":"Tang Jun","year":"2019","unstructured":"Jun Tang , Zhibo Yang , Yongpan Wang , Qi Zheng , Yongchao Xu , and Xiang Bai . 2019. SegLink+ : Detecting dense and arbitrary-shaped scene text by instance-aware component grouping. PR ( 2019 ), 106954. Jun Tang, Zhibo Yang, Yongpan Wang, Qi Zheng, Yongchao Xu, and Xiang Bai. 2019. SegLink+: Detecting dense and arbitrary-shaped scene text by instance-aware component grouping. PR (2019), 106954."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Zhi Tian Weilin Huang Tong He Pan He and Yu Qiao. 2016. Detecting text in natural image with connectionist text proposal network. In ECCV. 56--72. Zhi Tian Weilin Huang Tong He Pan He and Yu Qiao. 2016. Detecting text in natural image with connectionist text proposal network. In ECCV. 56--72.","DOI":"10.1007\/978-3-319-46484-8_4"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Zhuotao Tian Michelle Shu Pengyuan Lyu Ruiyu Li Chao Zhou Xiaoyong Shen and Jiaya Jia. 2019. Learning shape-aware embedding for scene text detection. In CVPR. 4234--4243. Zhuotao Tian Michelle Shu Pengyuan Lyu Ruiyu Li Chao Zhou Xiaoyong Shen and Jiaya Jia. 2019. Learning shape-aware embedding for scene text detection. In CVPR. 4234--4243.","DOI":"10.1109\/CVPR.2019.00436"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413819"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Fangfang Wang Liming Zhao Xi Li Xinchao Wang and Dacheng Tao. 2018. Geometry-aware scene text detection with instance transformation network. In CVPR. 1381--1389. Fangfang Wang Liming Zhao Xi Li Xinchao Wang and Dacheng Tao. 2018. Geometry-aware scene text detection with instance transformation network. In CVPR. 1381--1389.","DOI":"10.1109\/CVPR.2018.00150"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350988"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"Wenhai Wang Enze Xie Xiang Li Wenbo Hou Tong Lu Gang Yu and Shuai Shao. 2019 c. Shape robust text detection with progressive scale expansion network. In CVPR. 9336--9345. Wenhai Wang Enze Xie Xiang Li Wenbo Hou Tong Lu Gang Yu and Shuai Shao. 2019 c. Shape robust text detection with progressive scale expansion network. In CVPR. 9336--9345.","DOI":"10.1109\/CVPR.2019.00956"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"crossref","unstructured":"Wenhai Wang Enze Xie Xiaoge Song Yuhang Zang Wenjia Wang Tong Lu Gang Yu and Chunhua Shen. 2019 d. Efficient and accurate arbitrary-shaped text detection with pixel aggregation network. In ICCV. 8440--8449. Wenhai Wang Enze Xie Xiaoge Song Yuhang Zang Wenjia Wang Tong Lu Gang Yu and Chunhua Shen. 2019 d. Efficient and accurate arbitrary-shaped text detection with pixel aggregation network. In ICCV. 8440--8449.","DOI":"10.1109\/ICCV.2019.00853"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"crossref","unstructured":"Xiaobing Wang Yingying Jiang Zhenbo Luo Cheng-Lin Liu Hyunsoo Choi and Sungjin Kim. 2019 a. Arbitrary shape scene text detection with adaptive text region representation. In CVPR. 6449--6458. Xiaobing Wang Yingying Jiang Zhenbo Luo Cheng-Lin Liu Hyunsoo Choi and Sungjin Kim. 2019 a. Arbitrary shape scene text detection with adaptive text region representation. In CVPR. 6449--6458.","DOI":"10.1109\/CVPR.2019.00661"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.5555\/3367032.3367167"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Yuxin Wang Hongtao Xie Zheng-Jun Zha Mengting Xing Zilong Fu and Yongdong Zhang. 2020 b. ContourNet: Taking a further step toward accurate arbitrary-shaped scene text detection. In CVPR. 11750--11759. Yuxin Wang Hongtao Xie Zheng-Jun Zha Mengting Xing Zilong Fu and Yongdong Zhang. 2020 b. ContourNet: Taking a further step toward accurate arbitrary-shaped scene text detection. In CVPR. 11750--11759.","DOI":"10.1109\/CVPR42600.2020.01177"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"crossref","unstructured":"Yue Wu and Prem Natarajan. 2017. Self-organized text detection with minimal post-processing via border learning. In ICCV. 5000--5009. Yue Wu and Prem Natarajan. 2017. Self-organized text detection with minimal post-processing via border learning. In ICCV. 5000--5009.","DOI":"10.1109\/ICCV.2017.535"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"crossref","unstructured":"Shanyu Xiao Liangrui Peng Yan Ruijie An Keyu Yao Gang and Min Jaesik. 2020. Sequential deformation for accurate scene text detection. In ECCV. 108--124. Shanyu Xiao Liangrui Peng Yan Ruijie An Keyu Yao Gang and Min Jaesik. 2020. Sequential deformation for accurate scene text detection. In ECCV. 108--124.","DOI":"10.1007\/978-3-030-58526-6_7"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"crossref","unstructured":"Enze Xie Yuhang Zang Shuai Shao Gang Yu Cong Yao and Guangyao Li. 2019. Scene text detection with supervised pyramid context network. In AAAI. 9038--9045. Enze Xie Yuhang Zang Shuai Shao Gang Yu Cong Yao and Guangyao Li. 2019. Scene text detection with supervised pyramid context network. In AAAI. 9038--9045.","DOI":"10.1609\/aaai.v33i01.33019038"},{"key":"e_1_3_2_1_61_1","unstructured":"Youjiang Xu Jiaqi Duan Zhanghui Kuang Xiaoyu Yue Hongbin Sun Yue Guan and Wayne Zhang. 2019 a. Geometry normalization networks for accurate scene text detection. In ICCV. 9137--9146. Youjiang Xu Jiaqi Duan Zhanghui Kuang Xiaoyu Yue Hongbin Sun Yue Guan and Wayne Zhang. 2019 a. Geometry normalization networks for accurate scene text detection. In ICCV. 9137--9146."},{"key":"e_1_3_2_1_62_1","volume-title":"2019 b. TextField: Learning a deep direction field for irregular scene text detection","author":"Xu Yongchao","year":"2019","unstructured":"Yongchao Xu , Yukang Wang , Wei Zhou , Yongpan Wang , Zhibo Yang , and Xiang Bai . 2019 b. TextField: Learning a deep direction field for irregular scene text detection . IEEE TIP ( 2019 ), 5566--5579. Yongchao Xu, Yukang Wang, Wei Zhou, Yongpan Wang, Zhibo Yang, and Xiang Bai. 2019 b. TextField: Learning a deep direction field for irregular scene text detection. IEEE TIP (2019), 5566--5579."},{"key":"e_1_3_2_1_63_1","unstructured":"Chuhui Xue Shijian Lu and Fangneng Zhan. 2018. Accurate scene text detection through border semantics awareness and bootstrapping. In ECCV. 355--372. Chuhui Xue Shijian Lu and Fangneng Zhan. 2018. Accurate scene text detection through border semantics awareness and bootstrapping. In ECCV. 355--372."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.5555\/3367032.3367173"},{"key":"e_1_3_2_1_65_1","volume-title":"Multi-View Correlation Distillation for Incremental Object Detection. CoRR","author":"Yang Dongbao","year":"2021","unstructured":"Dongbao Yang , Yu Zhou , and Weiping Wang . 2021. Multi-View Correlation Distillation for Incremental Object Detection. CoRR , Vol. abs\/ 2107 .01787 ( 2021 ). Dongbao Yang, Yu Zhou, and Weiping Wang. 2021. Multi-View Correlation Distillation for Incremental Object Detection. CoRR, Vol. abs\/2107.01787 (2021)."},{"key":"e_1_3_2_1_66_1","volume-title":"Two-Level Residual Distillation based Triple Network for Incremental Object Detection. CoRR","author":"Yang Dongbao","year":"2020","unstructured":"Dongbao Yang , Yu Zhou , Dayan Wu , Can Ma , Fei Yang , and Weiping Wang . 2020. Two-Level Residual Distillation based Triple Network for Incremental Object Detection. CoRR , Vol. abs\/ 2007 .13428 ( 2020 ). Dongbao Yang, Yu Zhou, Dayan Wu, Can Ma, Fei Yang, and Weiping Wang. 2020. Two-Level Residual Distillation based Triple Network for Incremental Object Detection. CoRR, Vol. abs\/2007.13428 (2020)."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2014.2353813"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.5555\/2354409.2354851"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"crossref","unstructured":"Yuan Yao Chang Liu Dezhao Luo Yu Zhou and Qixiang Ye. 2020. Video Playback Rate Perception for Self-Supervised Spatio-Temporal Representation Learning. In CVPR. 6547--6556. Yuan Yao Chang Liu Dezhao Luo Yu Zhou and Qixiang Ye. 2020. Video Playback Rate Perception for Self-Supervised Spatio-Temporal Representation Learning. In CVPR. 6547--6556.","DOI":"10.1109\/CVPR42600.2020.00658"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"crossref","unstructured":"Jian Ye Zhe Chen Juhua Liu and Bo Du. 2020. TextFuseNet: Scene text detection with richer fused features. In IJCAI. 516--522. Jian Ye Zhe Chen Juhua Liu and Bo Du. 2020. TextFuseNet: Scene text detection with richer fused features. In IJCAI. 516--522.","DOI":"10.24963\/ijcai.2020\/72"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"crossref","unstructured":"Chengquan Zhang Borong Liang Zuming Huang Mengyi En Junyu Han Errui Ding and Xinghao Ding. 2019 a. Look more than once: An accurate detector for text of arbitrary shapes. In CVPR. 10552--10561. Chengquan Zhang Borong Liang Zuming Huang Mengyi En Junyu Han Errui Ding and Xinghao Ding. 2019 a. Look more than once: An accurate detector for text of arbitrary shapes. In CVPR. 10552--10561.","DOI":"10.1109\/CVPR.2019.01080"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"crossref","unstructured":"Sheng Zhang Yuliang Liu Lianwen Jin and Canjie Luo. 2018. Feature enhancement network: A refined scene text detector. In AAAI. 2612--2619. Sheng Zhang Yuliang Liu Lianwen Jin and Canjie Luo. 2018. Feature enhancement network: A refined scene text detector. In AAAI. 2612--2619.","DOI":"10.1609\/aaai.v32i1.11887"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"crossref","unstructured":"Shi-Xue Zhang Xiaobin Zhu Jie-Bo Hou Chang Liu Chun Yang Hongfa Wang and Xu-Cheng Yin. 2020 b. Deep relational reasoning graph network for arbitrary shape text detection. In CVPR. 9696--9705. Shi-Xue Zhang Xiaobin Zhu Jie-Bo Hou Chang Liu Chun Yang Hongfa Wang and Xu-Cheng Yin. 2020 b. Deep relational reasoning graph network for arbitrary shape text detection. In CVPR. 9696--9705.","DOI":"10.1109\/CVPR42600.2020.00972"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3454301"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"crossref","unstructured":"Yifei Zhang Chang Liu Yu Zhou Wei Wang Weiping Wang and Qixiang Ye. 2020 a. Progressive Cluster Purification for Unsupervised Feature Learning. In ICPR. 8476--8483. Yifei Zhang Chang Liu Yu Zhou Wei Wang Weiping Wang and Qixiang Ye. 2020 a. Progressive Cluster Purification for Unsupervised Feature Learning. In ICPR. 8476--8483.","DOI":"10.1109\/ICPR48806.2021.9412301"},{"key":"e_1_3_2_1_76_1","volume-title":"Exploring Instance Relations for Unsupervised Feature Embedding. CoRR","author":"Zhang Yifei","year":"2021","unstructured":"Yifei Zhang , Yu Zhou , and Weiping Wang . 2021. Exploring Instance Relations for Unsupervised Feature Embedding. CoRR , Vol. abs\/ 2105 .03341 ( 2021 ). Yifei Zhang, Yu Zhou, and Weiping Wang. 2021. Exploring Instance Relations for Unsupervised Feature Embedding. CoRR, Vol. abs\/2105.03341 (2021)."},{"key":"e_1_3_2_1_77_1","volume-title":"EAST: An efficient and accurate scene text detector. In CVPR. 2642--2651.","author":"Zhou Xinyu","year":"2017","unstructured":"Xinyu Zhou , Cong Yao , He Wen , Yuzhi Wang , Shuchang Zhou , Weiran He , and Jiajun Liang . 2017 . EAST: An efficient and accurate scene text detector. In CVPR. 2642--2651. Xinyu Zhou, Cong Yao, He Wen, Yuzhi Wang, Shuchang Zhou, Weiran He, and Jiajun Liang. 2017. EAST: An efficient and accurate scene text detector. In CVPR. 2642--2651."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413565"}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475178","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475178","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:47Z","timestamp":1750193327000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475178"}},"subtitle":["Rethinking Mask R-CNN for Dense and Arbitrary-Shaped Scene Text Detection"],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":78,"alternative-id":["10.1145\/3474085.3475178","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475178","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}