{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T15:16:26Z","timestamp":1758122186630,"version":"3.40.3"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031730009"},{"type":"electronic","value":"9783031730016"}],"license":[{"start":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:00:00Z","timestamp":1732665600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:00:00Z","timestamp":1732665600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73001-6_14","type":"book-chapter","created":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T10:22:38Z","timestamp":1732616558000},"page":"237-254","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["WAS: Dataset and\u00a0Methods for\u00a0Artistic Text Segmentation"],"prefix":"10.1007","author":[{"given":"Xudong","family":"Xie","sequence":"first","affiliation":[]},{"given":"Yuzhe","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Zhifei","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhaowen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Xiong","sequence":"additional","affiliation":[]},{"given":"Xiang","family":"Bai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,27]]},"reference":[{"key":"14_CR1","doi-asserted-by":"crossref","unstructured":"Albishre, K., Albathan, M., Li, Y.: Effective 20 newsgroups dataset cleaning. In: IEEE\/WIC\/ACM International Conference on Web Intelligence and Intelligent Agent Technology, WI-IAT 2015, Singapore, December 6-9, 2015 - Volume III, pp. 98\u2013101. IEEE Computer Society (2015)","DOI":"10.1109\/WI-IAT.2015.90"},{"key":"14_CR2","doi-asserted-by":"publisher","unstructured":"Bonechi, S., Andreini, P., Bianchini, M., Scarselli, F.: COCO_TS dataset: pixel\u2013level annotations based on weak supervision for scene text segmentation. In: Tetko, I.V., K\u016frkov\u00e1, V., Karpov, P., Theis, F. (eds.) ICANN 2019. LNCS, vol. 11729, pp. 238\u2013250. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-30508-6_20","DOI":"10.1007\/978-3-030-30508-6_20"},{"key":"14_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.patrec.2020.06.023","volume":"138","author":"S Bonechi","year":"2020","unstructured":"Bonechi, S., Bianchini, M., Scarselli, F., Andreini, P.: Weak supervision for generating pixel-level annotations in scene text segmentation. Pattern Recogn. Lett. 138, 1\u20137 (2020)","journal-title":"Pattern Recogn. Lett."},{"key":"14_CR4","unstructured":"Bubeck, S., et al.: Sparks of artificial general intelligence: early experiments with GPT-4 (2023)"},{"key":"14_CR5","unstructured":"Chen, J., Huang, Y., LV, T., Cui, L., Chen, Q., Wei, F.: Textdiffuser: diffusion models as text painters. In: Oh, A., Neumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems, vol.\u00a036, pp. 9353\u20139387 (2023)"},{"issue":"4","key":"14_CR6","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L Chen","year":"2018","unstructured":"Chen, L., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"14_CR9","unstructured":"Cheng, B., Schwing, A., Kirillov, A.: Per-pixel classification is not all you need for semantic segmentation. In: Advance in Neural Information Processing System, vol. 34, pp. 17864\u201317875 (2021)"},{"key":"14_CR10","doi-asserted-by":"crossref","unstructured":"Ch\u2019ng, C.K., Chan, C.S.: Total-text: a comprehensive dataset for scene text detection and recognition. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a01, pp. 935\u2013942. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.157"},{"key":"14_CR11","unstructured":"Contributors, M.: MMSegmentation: Openmmlab semantic segmentation toolbox and benchmark (2020). https:\/\/github.com\/open-mmlab\/mmsegmentation"},{"key":"14_CR12","doi-asserted-by":"crossref","unstructured":"Diaz-Escobar, J., Kober, V.: Natural scene text detection and segmentation using phase-based regions and character retrieval. Math. Prob. Eng. 2020 (2020)","DOI":"10.1155\/2020\/7067251"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Ji, Y., et al.: DDP: diffusion model for dense visual prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 21741\u201321752 (2023)","DOI":"10.1109\/ICCV51070.2023.01987"},{"key":"14_CR14","doi-asserted-by":"crossref","unstructured":"Karatzas, D., et al.: ICDAR 2013 robust reading competition. In: 2013 12th International Conference on Document Analysis and Recognition, pp. 1484\u20131493. IEEE (2013)","DOI":"10.1109\/ICDAR.2013.221"},{"key":"14_CR15","doi-asserted-by":"crossref","unstructured":"Li, D., Ling, H., Kim, S.W., Kreis, K., Fidler, S., Torralba, A.: BigDatasetGAN: synthesizing imageNet with pixel-wise annotations. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18-24, 2022, pp. 21298\u201321308. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.02064"},{"key":"14_CR16","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: Proceedings of the 40th International Conference on Machine Learning, vol.\u00a0202, pp. 19730\u201319742. PMLR (2023)"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Li, W., He, Y., Qi, Y., Li, Z., Tang, Y.: FET-GAN: font and effect transfer via k-shot adaptive instance normalization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 1717\u20131724 (2020)","DOI":"10.1609\/aaai.v34i02.5535"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Monkey: image resolution and text label are important things for large multi-modal models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 26763\u201326773 (2024)","DOI":"10.1109\/CVPR52733.2024.02527"},{"key":"14_CR19","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net (2019)"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Lyu, P., Bai, X., Yao, C., Zhu, Z., Huang, T., Liu, W.: Auto-encoder guided GAN for Chinese calligraphy synthesis. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a01, pp. 1095\u20131100. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.181"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Milletari, F., Navab, N., Ahmadi, S.A.: V-net: fully convolutional neural networks for volumetric medical image segmentation. In: 2016 fourth international conference on 3D vision (3DV), pp. 565\u2013571. IEEE (2016)","DOI":"10.1109\/3DV.2016.79"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Mishra, A., Alahari, K., Jawahar, C.: An MRF model for binarization of natural scene text. In: 2011 International Conference on Document Analysis and Recognition, pp. 11\u201316. IEEE (2011)","DOI":"10.1109\/ICDAR.2011.12"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Nayef, N., et al.: ICDAR2017 robust reading challenge on multi-lingual scene text detection and script identification - RRC-MLT. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a001, pp. 1454\u20131459 (2017)","DOI":"10.1109\/ICDAR.2017.237"},{"key":"14_CR24","unstructured":"Nguyen, Q., Vu, T., Tran, A., Nguyen, K.: Dataset diffusion: diffusion-based synthetic data generation for pixel-level semantic segmentation. In: Advances in Neural Information Processing Systems, pp. 76872\u201376892 (2023)"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18-24, 2022, pp. 10674\u201310685. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"issue":"11","key":"14_CR26","doi-asserted-by":"publisher","first-page":"5298","DOI":"10.1109\/TIP.2017.2735182","volume":"26","author":"W Shen","year":"2017","unstructured":"Shen, W., Zhao, K., Jiang, Y., Wang, Y., Bai, X., Yuille, A.: Deepskeleton: learning multi-task scale-associated deep side outputs for object skeleton extraction in natural images. IEEE Trans. Image Process. 26(11), 5298\u20135311 (2017)","journal-title":"IEEE Trans. Image Process."},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"Su, B., Lu, S., Tan, C.L.: Binarization of historical document images using the local maximum and minimum. In: Proceedings of the 9th IAPR International Workshop on Document Analysis Systems, pp. 159\u2013166 (2010)","DOI":"10.1145\/1815330.1815351"},{"issue":"3","key":"14_CR28","doi-asserted-by":"publisher","first-page":"1509","DOI":"10.1109\/TIP.2017.2656474","volume":"26","author":"Y Tang","year":"2017","unstructured":"Tang, Y., Wu, X.: Scene text detection and segmentation based on cascaded convolution neural networks. IEEE Trans. Image Process. 26(3), 1509\u20131520 (2017)","journal-title":"IEEE Trans. Image Process."},{"key":"14_CR29","unstructured":"Veit, A., Matera, T., Neumann, L., Matas, J., Belongie, S.: Coco-text: Dataset and benchmark for text detection and recognition in natural images. arXiv preprint arXiv:1601.07140 (2016)"},{"issue":"10","key":"14_CR30","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2020","unstructured":"Wang, J., et al.: Deep high-resolution representation learning for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. 43(10), 3349\u20133364 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"14_CR31","doi-asserted-by":"crossref","unstructured":"Wang, X., Huang, L., Liu, C.: A novel method for embedded text segmentation based on stroke and color. In: 2011 International Conference on Document Analysis and Recognition, pp. 151\u2013155. IEEE (2011)","DOI":"10.1109\/ICDAR.2011.39"},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Wang, Y., Xu, Y., Tsogkas, S., Bai, X., Dickinson, S., Siddiqi, K.: Deepflux for skeletons in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5287\u20135296 (2019)","DOI":"10.1109\/CVPR.2019.00543"},{"key":"14_CR33","doi-asserted-by":"crossref","unstructured":"Wang, Y., Xie, H., Wang, Z., Qu, Y., Zhang, Y.: What is the real need for scene text removal? Exploring the background integrity and erasure exhaustivity properties. IEEE Trans. Image Process. (2023)","DOI":"10.1109\/TIP.2023.3290517"},{"key":"14_CR34","doi-asserted-by":"crossref","unstructured":"Wu, L., et al.: Editing text in the wild. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 1500\u20131508. MM 2019, Association for Computing Machinery, New York (2019)","DOI":"10.1145\/3343031.3350929"},{"key":"14_CR35","unstructured":"Wu, W., et al.: DatasetDM: synthesizing data with perception annotations using diffusion models. In: Advances in Neural Information Processing Systems, vol.\u00a036, pp. 54683\u201354695 (2023)"},{"key":"14_CR36","doi-asserted-by":"crossref","unstructured":"Wu, W., Zhao, Y., Shou, M.Z., Zhou, H., Shen, C.: Diffumask: synthesizing images with pixel-level annotations for semantic segmentation using diffusion models. In: IEEE\/CVF International Conference on Computer Vision, ICCV 2023, Paris, France, October 1-6, 2023, pp. 1206\u20131217. IEEE (2023)","DOI":"10.1109\/ICCV51070.2023.00117"},{"key":"14_CR37","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: SegFormer: simple and efficient design for semantic segmentation with transformers. In: Advances in Neural Information Processing Systems, vol.\u00a034, pp. 12077\u201312090. Curran Associates, Inc. (2021)"},{"key":"14_CR38","doi-asserted-by":"crossref","unstructured":"Xie, J., Li, W., Li, X., Liu, Z., Ong, Y.S., Loy, C.C.: MosaicFusion: diffusion models as data augmenters for large vocabulary instance segmentation. CoRR abs\/2309.13042 (2023)","DOI":"10.1007\/s11263-024-02223-3"},{"key":"14_CR39","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/978-3-031-19815-1_18","volume-title":"ECCV 2022","author":"X Xie","year":"2022","unstructured":"Xie, X., Fu, L., Zhang, Z., Wang, Z., Bai, X.: Toward understanding worArt: corner-guided transformer for scene text recognition. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13688, pp. 303\u2013321. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19815-1_18"},{"key":"14_CR40","doi-asserted-by":"crossref","unstructured":"Xu, X., Zhang, Z., Wang, Z., Price, B., Wang, Z., Shi, H.: Rethinking text segmentation: a novel dataset and a text-specific refinement approach. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12045\u201312055 (2021)","DOI":"10.1109\/CVPR46437.2021.01187"},{"key":"14_CR41","doi-asserted-by":"crossref","unstructured":"Xu, X., Qi, Z., Ma, J., Zhang, H., Shan, Y., Qie, X.: BTS: a bi-lingual benchmark for text segmentation in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19152\u201319162 (2022)","DOI":"10.1109\/CVPR52688.2022.01856"},{"key":"14_CR42","doi-asserted-by":"crossref","unstructured":"Ye, H., Kuen, J., Liu, Q., Lin, Z.L., Price, B., Xu, D.: SegGen: supercharging segmentation models with text2mask and mask2img synthesis. CoRR abs\/2311.03355 (2023)","DOI":"10.1007\/978-3-031-73242-3_20"},{"key":"14_CR43","doi-asserted-by":"crossref","unstructured":"Yu, H., Wang, X., Niu, K., Li, B., Xue, X.: Scene text segmentation with text-focused transformers. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 2898\u20132907 (2023)","DOI":"10.1145\/3581783.3611755"},{"key":"14_CR44","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1007\/978-3-030-58539-6_11","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y Yuan","year":"2020","unstructured":"Yuan, Y., Chen, X., Wang, J.: Object-contextual representations for semantic segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12351, pp. 173\u2013190. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58539-6_11"},{"key":"14_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: IEEE\/CVF International Conference on Computer Vision, ICCV 2023, Paris, France, October 1-6, 2023, pp. 3813\u20133824. IEEE (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"issue":"3","key":"14_CR46","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1145\/357994.358023","volume":"27","author":"TY Zhang","year":"1984","unstructured":"Zhang, T.Y., Suen, C.Y.: A fast parallel algorithm for thinning digital patterns. Commun. ACM 27(3), 236\u2013239 (1984)","journal-title":"Commun. ACM"},{"key":"14_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: DatasetGAN: efficient labeled data factory with minimal human effort. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, virtual, June 19-25, 2021, pp. 10145\u201310155. Computer Vision Foundation \/ IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.01001"},{"key":"14_CR48","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: CVPR, pp. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"14_CR49","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. In: 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net (2021)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73001-6_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T11:10:33Z","timestamp":1732619433000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73001-6_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,27]]},"ISBN":["9783031730009","9783031730016"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73001-6_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,27]]},"assertion":[{"value":"27 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}