{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:26:59Z","timestamp":1757618819672,"version":"3.44.0"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819697939"},{"type":"electronic","value":"9789819697946"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-9794-6_30","type":"book-chapter","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T06:12:08Z","timestamp":1752473528000},"page":"357-368","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["CNNFormer: A CNN-Transformer Hybrid Model for Referring Image Segmentation"],"prefix":"10.1007","author":[{"given":"Kangsai","family":"Yao","sequence":"first","affiliation":[]},{"given":"Guang","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Xizhan","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Xiaofeng","family":"Qu","sequence":"additional","affiliation":[]},{"given":"Sijie","family":"Niu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,15]]},"reference":[{"key":"30_CR1","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., Wang, S., Jiang, X.: Vision-language transformer and query generation for referring segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 16321\u201316330. IEEE, Montreal (2021)","DOI":"10.1109\/ICCV48922.2021.01601"},{"key":"30_CR2","doi-asserted-by":"publisher","unstructured":"Hu, R., Rohrbach, M., Darrell, T.: Segmentation from natural language expressions. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) 14th European Conference on Computer Vision (ECCV 2016), pp. 108\u2013124. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_7","DOI":"10.1007\/978-3-319-46448-0_7"},{"key":"30_CR3","doi-asserted-by":"crossref","unstructured":"Yu, L., et al.: MAttNet: modular attention network for referring expression comprehension. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1307\u20131315. IEEE, Salt Lake City (2018)","DOI":"10.1109\/CVPR.2018.00142"},{"key":"30_CR4","doi-asserted-by":"crossref","unstructured":"Feng, G., Hu, Z., Zhang, L., Lu, H.: Encoder fusion network with co-attention embedding for referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 15506\u201315515. IEEE, Nashville (2021)","DOI":"10.1109\/CVPR46437.2021.01525"},{"key":"30_CR5","unstructured":"Chen, Y.-W., Tsai, Y.-H., Wang, T., Lin, Y.-Y., Yang, M.-H.: Referring expression object segmentation with caption-aware consistency. arXiv preprint arXiv:1910.04748 (2019)"},{"key":"30_CR6","doi-asserted-by":"crossref","unstructured":"Yang, Z., Wang, J., Tang, Y., Chen, K., Zhao, H., Torr, P.H.: LAVT: language-Aware Vision Transformer for referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18155\u201318165. IEEE, New Orleans (2022)","DOI":"10.1109\/CVPR52688.2022.01762"},{"key":"30_CR7","doi-asserted-by":"crossref","unstructured":"Hu, Y., et al.: Beyond one-to-one: rethinking the referring image segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 4067\u20134077. IEEE, Paris (2023)","DOI":"10.1109\/ICCV51070.2023.00376"},{"key":"30_CR8","doi-asserted-by":"crossref","unstructured":"Xu, Z., Chen, Z., Zhang, Y., Song, Y., Wan, X., Li, G.: Bridging vision and language encoders: parameter-efficient tuning for referring image segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 17503\u201317512. IEEE, Paris (2023)","DOI":"10.1109\/ICCV51070.2023.01605"},{"key":"30_CR9","doi-asserted-by":"crossref","unstructured":"Hu, Z., Feng, G., Sun, J., Zhang, L., Lu, H.: Bi-directional relationship inferring network for referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10632\u201310641. IEEE, Seattle (2020)","DOI":"10.1109\/CVPR42600.2020.00448"},{"key":"30_CR10","doi-asserted-by":"publisher","unstructured":"Hui, T., et al.: Linguistic structure guided context modeling for referring image segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 1247\u20131258. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58607-2_4","DOI":"10.1007\/978-3-030-58607-2_4"},{"key":"30_CR11","doi-asserted-by":"crossref","unstructured":"Huang, S., et al.: Referring image segmentation via cross-modal progressive comprehension. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4566\u20134575. IEEE, Seattle (2020)","DOI":"10.1109\/CVPR42600.2020.01050"},{"key":"30_CR12","doi-asserted-by":"crossref","unstructured":"Jing, Y., Kong, T., Wang, W., Wang, L., Li, L., Tan, T.: Locate then segment: a strong pipeline for referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9858\u20139867. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.00973"},{"key":"30_CR13","doi-asserted-by":"crossref","unstructured":"Tang, J., Zheng, G., Shi, C., Yang, S.: Contrastive grouping with transformer for referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 23570\u201323580. IEEE, Vancouver (2023)","DOI":"10.1109\/CVPR52729.2023.02257"},{"key":"30_CR14","doi-asserted-by":"crossref","unstructured":"Kim, N., Kim, D., Lan, C., Zeng, W., Kwak, S.: ReSTR: convolution-free referring image segmentation using transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18145\u201318154. IEEE, New Orleans (2022)","DOI":"10.1109\/CVPR52688.2022.01761"},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"Wang, Z., et al.: CRIS: CLIP-driven referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11686\u201311695. IEEE, New Orleans (2022)","DOI":"10.1109\/CVPR52688.2022.01139"},{"key":"30_CR16","doi-asserted-by":"crossref","unstructured":"Yue, P., et al.: Adaptive selection based referring image segmentation. In: Proceedings of the 32nd ACM International Conference on Multimedia (ACM MM), pp. 1101\u20131110. ACM, Ottawa (2024)","DOI":"10.1145\/3664647.3680850"},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 568\u2013578. IEEE, Montreal (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"30_CR18","unstructured":"Guo, M.-H., Lu, C.-Z., Hou, Q., Liu, Z., Cheng, M.-M., Hu, S.-M.: SegNext: rethinking convolutional attention design for semantic segmentation. arXiv preprint arXiv:2209.08575 (2022)"},{"key":"30_CR19","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the 35th Conference on Neural Information Processing Systems (NeurIPS), pp. 1\u201325. Curran Associates (2021)"},{"key":"30_CR20","doi-asserted-by":"crossref","unstructured":"Liu, C., Ding, H., Jiang, X.: GRES: Generalized Referring Expression Segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 23592\u201323601. IEEE, Vancouver (2023)","DOI":"10.1109\/CVPR52729.2023.02259"},{"key":"30_CR21","doi-asserted-by":"crossref","unstructured":"Yang, Z., Wang, J., Tang, Y., Chen, K., Zhao, H., Torr, P.H.S.: LAVT: language-aware vision transformer for referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18155\u201318165. IEEE, New Orleans (2022)","DOI":"10.1109\/CVPR52688.2022.01762"},{"key":"30_CR22","doi-asserted-by":"publisher","unstructured":"Yu, L., Poirson, P., Yang, S., Berg, A.C., Berg, T.L.: Modeling Context in Referring Expressions. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) European Conference on Computer Vision (ECCV), pp. 69\u201385. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_5","DOI":"10.1007\/978-3-319-46475-6_5"},{"key":"30_CR23","doi-asserted-by":"crossref","unstructured":"Mao, J., Huang, J., Toshev, A., Camburu, O., Yuille, A.L., Murphy, K.: Generation and comprehension of unambiguous object descriptions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11\u201320. IEEE, Las Vegas (2016)","DOI":"10.1109\/CVPR.2016.9"},{"key":"30_CR24","doi-asserted-by":"publisher","unstructured":"Nagaraja, V.K., Morariu, V.I., Davis, L.S.: Modeling context between objects for referring expression understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) European Conference on Computer Vision (ECCV), pp. 792\u2013807. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_48","DOI":"10.1007\/978-3-319-46493-0_48"},{"key":"30_CR25","unstructured":"Li, X., et al.: Vision-Language pretraining: a survey of methods and applications. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1\u201312. IEEE, New Orleans (2022)"},{"key":"30_CR26","doi-asserted-by":"publisher","first-page":"5823","DOI":"10.1109\/TMM.2023.3340062","volume":"26","author":"Y Cho","year":"2024","unstructured":"Cho, Y., Yu, H., Kang, S.-J.: Cross-aware early fusion with stage-divided vision and language transformer encoders for referring image segmentation. IEEE Trans. Multimedia 26, 5823\u20135833 (2024)","journal-title":"IEEE Trans. Multimedia"},{"key":"30_CR27","doi-asserted-by":"crossref","unstructured":"Liu, Y., Xu, R., Tang, Y.: Fully aligned network for referring image segmentation. In: 2024 IEEE International Conference on Visual Communications and Image Processing (VCIP), pp. 1\u20135. IEEE, Kuching (2024)","DOI":"10.1109\/VCIP63160.2024.10849845"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-9794-6_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T10:18:17Z","timestamp":1757240297000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-9794-6_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819697939","9789819697946"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-9794-6_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"15 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}