{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:30:51Z","timestamp":1742913051882,"version":"3.40.3"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031728471"},{"type":"electronic","value":"9783031728488"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72848-8_17","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:37:41Z","timestamp":1732801061000},"page":"288-304","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["GTMS: A Gradient-Driven Tree-Guided Mask-Free Referring Image Segmentation Method"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-7403-3624","authenticated-orcid":false,"given":"Haoxin","family":"Lyu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3420-2798","authenticated-orcid":false,"given":"Tianxiong","family":"Zhong","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9386-9677","authenticated-orcid":false,"given":"Sanyuan","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"issue":"2","key":"17_CR1","doi-asserted-by":"publisher","first-page":"555","DOI":"10.1109\/TIP.2013.2291328","volume":"23","author":"L Bao","year":"2013","unstructured":"Bao, L., Song, Y., Yang, Q., Yuan, H., Wang, G.: Tree filtering: efficient structure-preserving smoothing with a minimum spanning tree. IEEE Trans. Image Process. 23(2), 555\u2013569 (2013)","journal-title":"IEEE Trans. Image Process."},{"key":"17_CR2","doi-asserted-by":"crossref","unstructured":"Chen, D.J., Jia, S., Lo, Y.C., Chen, H.T., Liu, T.L.: See-through-text grouping for referring image segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7454\u20137463 (2019)","DOI":"10.1109\/ICCV.2019.00755"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Dai, J., He, K., Sun, J.: BoxSup: exploiting bounding boxes to supervise convolutional networks for semantic segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1635\u20131643 (2015)","DOI":"10.1109\/ICCV.2015.191"},{"key":"17_CR4","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., Wang, S., Jiang, X.: Vision-language transformer and query generation for referring segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16321\u201316330 (2021)","DOI":"10.1109\/ICCV48922.2021.01601"},{"key":"17_CR5","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vis. 88, 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vis."},{"issue":"3","key":"17_CR6","doi-asserted-by":"publisher","first-page":"3927","DOI":"10.1109\/TNNLS.2022.3201372","volume":"35","author":"G Feng","year":"2022","unstructured":"Feng, G., Zhang, L., Hu, Z., Lu, H.: Learning from box annotations for referring image segmentation. IEEE Trans. Neural Netw. Learn. Syst. 35(3), 3927\u20133937 (2022)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"17_CR8","unstructured":"Hsu, C.C., Hsu, K.J., Tsai, C.C., Lin, Y.Y., Chuang, Y.Y.: Weakly supervised instance segmentation using the bounding box tightness prior. In: Advances in Neural Information Processing Systems 32 (2019)"},{"key":"17_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1007\/978-3-319-46448-0_7","volume-title":"Computer Vision \u2013 ECCV 2016","author":"R Hu","year":"2016","unstructured":"Hu, R., Rohrbach, M., Darrell, T.: Segmentation from natural language expressions. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 108\u2013124. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_7"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Hu, Z., Feng, G., Sun, J., Zhang, L., Lu, H.: Bi-directional relationship inferring network for referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4424\u20134433 (2020)","DOI":"10.1109\/CVPR42600.2020.00448"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Huang, S., et al.: Referring image segmentation via cross-modal progressive comprehension. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10488\u201310497 (2020)","DOI":"10.1109\/CVPR42600.2020.01050"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Jain, K., Gandhi, V.: Comprehensive multi-modal interactions for referring image segmentation. arXiv preprint arXiv:2104.10412 (2021)","DOI":"10.18653\/v1\/2022.findings-acl.270"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Kazemzadeh, S., Ordonez, V., Matten, M., Berg, T.: ReferitGame: referring to objects in photographs of natural scenes. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 787\u2013798 (2014)","DOI":"10.3115\/v1\/D14-1086"},{"key":"17_CR14","unstructured":"Kervadec, H., Dolz, J., Wang, S., Granger, E., Ayed, I.B.: Bounding boxes for weakly supervised segmentation: global constraints get close to full supervision. In: Medical Imaging with Deep Learning, pp. 365\u2013381. PMLR (2020)"},{"key":"17_CR15","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"17_CR16","doi-asserted-by":"crossref","unstructured":"Koga, T., Suetake, N.: Structural-context-preserving image abstraction by using space-filling curve based on minimum spanning tree. In: 2011 18th IEEE International Conference on Image Processing, pp. 1465\u20131468. IEEE (2011)","DOI":"10.1109\/ICIP.2011.6115719"},{"key":"17_CR17","unstructured":"Kr\u00e4henb\u00fchl, P., Koltun, V.: Efficient inference in fully connected CRFs with Gaussian edge potentials. In: Neural Information Processing Systems (2011)"},{"key":"17_CR18","doi-asserted-by":"crossref","unstructured":"Lee, J., Lee, S., Nam, J., Yu, S., Do, J., Taghavi, T.: Weakly supervised referring image segmentation with intra-chunk and inter-chunk consistency. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 21870\u201321881 (2023)","DOI":"10.1109\/ICCV51070.2023.01999"},{"key":"17_CR19","doi-asserted-by":"crossref","unstructured":"Lee, J., Yi, J., Shin, C., Yoon, S.: BBAM: bounding box attribution map for weakly supervised semantic and instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2643\u20132652 (2021)","DOI":"10.1109\/CVPR46437.2021.00267"},{"key":"17_CR20","unstructured":"Li, M., Sigal, L.: Referring transformer: a one-step approach to multi-task visual grounding. In: Advances in Neural Information Processing Systems 34, pp. 19652\u201319664 (2021)"},{"key":"17_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-031-19818-2_1","volume-title":"Computer Vision-ECCV 2022, Part XXIX","author":"W Li","year":"2022","unstructured":"Li, W., Liu, W., Zhu, J., Cui, M., Hua, X.S., Zhang, L.: Box-supervised instance segmentation with level set evolution. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, Part XXIX. LNCS, vol. 13689, pp. 1\u201318. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19818-2_1"},{"key":"17_CR22","unstructured":"Li, Z., Wang, M., Mei, J., Liu, Y.: MaIL: a unified mask-image-language trimodal network for referring image segmentation. arXiv preprint arXiv:2111.10747 (2021)"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Liang, Z., Wang, T., Zhang, X., Sun, J., Shen, J.: Tree energy loss: towards sparsely annotated semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16907\u201316916 (2022)","DOI":"10.1109\/CVPR52688.2022.01640"},{"key":"17_CR24","doi-asserted-by":"crossref","unstructured":"Liu, J., et al.: PolyFormer: referring image segmentation as sequential polygon generation. arXiv e-prints, arXiv-2302 (2023)","DOI":"10.1109\/CVPR52729.2023.01789"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Milletari, F., Navab, N., Ahmadi, S.A.: V-Net: fully convolutional neural networks for volumetric medical image segmentation. In: 2016 Fourth International Conference on 3D Vision (3DV), pp. 565\u2013571. IEEE (2016)","DOI":"10.1109\/3DV.2016.79"},{"key":"17_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"792","DOI":"10.1007\/978-3-319-46493-0_48","volume-title":"Computer Vision \u2013 ECCV 2016","author":"VK Nagaraja","year":"2016","unstructured":"Nagaraja, V.K., Morariu, V.I., Davis, L.S.: Modeling context between objects for referring expression understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 792\u2013807. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_48"},{"key":"17_CR27","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"17_CR28","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-CAM: visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"17_CR29","doi-asserted-by":"crossref","unstructured":"Song, C., Huang, Y., Ouyang, W., Wang, L.: Box-driven class-wise region masking and filling rate guided loss for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3136\u20133145 (2019)","DOI":"10.1109\/CVPR.2019.00325"},{"key":"17_CR30","unstructured":"Song, L., et al.: Learnable tree filter for structure-preserving feature transform. In: Advances in Neural Information Processing Systems 32 (2019)"},{"key":"17_CR31","doi-asserted-by":"crossref","unstructured":"Stawiaski, J., Meyer, F.: Minimum spanning tree adaptive image filtering. In: 2009 16th IEEE International Conference on Image Processing (ICIP), pp. 2245\u20132248. IEEE (2009)","DOI":"10.1109\/ICIP.2009.5413942"},{"key":"17_CR32","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Wang, X., Chen, H.: BoxInst: high-performance instance segmentation with box annotations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5443\u20135452 (2021)","DOI":"10.1109\/CVPR46437.2021.00540"},{"key":"17_CR33","doi-asserted-by":"crossref","unstructured":"Tu, W.C., He, S., Yang, Q., Chien, S.Y.: Real-time salient object detection with a minimum spanning tree. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2334\u20132342 (2016)","DOI":"10.1109\/CVPR.2016.256"},{"key":"17_CR34","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems 30 (2017)"},{"key":"17_CR35","doi-asserted-by":"crossref","unstructured":"Wang, Z., et al.: CRIS: CLIP-driven referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11686\u201311695 (2022)","DOI":"10.1109\/CVPR52688.2022.01139"},{"key":"17_CR36","doi-asserted-by":"crossref","unstructured":"Ye, L., Rochan, M., Liu, Z., Wang, Y.: Cross-modal self-attention network for referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10502\u201310511 (2019)","DOI":"10.1109\/CVPR.2019.01075"},{"key":"17_CR37","doi-asserted-by":"crossref","unstructured":"Yu, L., et al.: MAttNet: modular attention network for referring expression comprehension. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1307\u20131315 (2018)","DOI":"10.1109\/CVPR.2018.00142"},{"key":"17_CR38","doi-asserted-by":"crossref","unstructured":"Zhao, W., Rao, Y., Liu, Z., Liu, B., Zhou, J., Lu, J.: Unleashing text-to-image diffusion models for visual perception. arXiv preprint arXiv:2303.02153 (2023)","DOI":"10.1109\/ICCV51070.2023.00527"},{"key":"17_CR39","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Zhu, Y., Ye, Q., Qiu, Q., Jiao, J.: Weakly supervised instance segmentation using class peak response. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3791\u20133800 (2018)","DOI":"10.1109\/CVPR.2018.00399"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72848-8_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T14:08:11Z","timestamp":1732802891000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72848-8_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9783031728471","9783031728488"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72848-8_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}