{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T04:22:37Z","timestamp":1745554957758,"version":"3.40.4"},"publisher-location":"Cham","reference-count":74,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031734038"},{"type":"electronic","value":"9783031734045"}],"license":[{"start":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T00:00:00Z","timestamp":1730246400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T00:00:00Z","timestamp":1730246400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73404-5_11","type":"book-chapter","created":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T16:03:13Z","timestamp":1730217793000},"page":"178-196","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["OLAF: A Plug-and-Play Framework for\u00a0Enhanced Multi-object Multi-part Scene Parsing"],"prefix":"10.1007","author":[{"given":"Pranav","family":"Gupta","sequence":"first","affiliation":[]},{"given":"Rishubh","family":"Singh","sequence":"additional","affiliation":[]},{"given":"Pradeep","family":"Shenoy","sequence":"additional","affiliation":[]},{"given":"Ravi Kiran","family":"Sarvadevabhatla","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,30]]},"reference":[{"key":"11_CR1","doi-asserted-by":"crossref","unstructured":"Achlioptas, P., Fan, J., Hawkins, R., Goodman, N., Guibas, L.J.: Shapeglot: learning language for shape differentiation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8938\u20138947 (2019)","DOI":"10.1109\/ICCV.2019.00903"},{"key":"11_CR2","doi-asserted-by":"publisher","unstructured":"Azizpour, H., Laptev, I.: Object detection using strongly-supervised deformable part models. In: Computer Vision\u2013ECCV 2012: 12th European Conference on Computer Vision, Florence, Italy, October 7-13, 2012, Proceedings, Part I 12, pp. 836\u2013849. Springer (2012). https:\/\/doi.org\/10.1007\/978-3-642-33718-5_60","DOI":"10.1007\/978-3-642-33718-5_60"},{"issue":"12","key":"11_CR3","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan, V., Kendall, A., Cipolla, R.: Segnet: a deep convolutional encoder-decoder architecture for image segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 39(12), 2481\u20132495 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"11_CR4","doi-asserted-by":"crossref","unstructured":"Benenson, R., Popov, S., Ferrari, V.: Large-scale interactive object segmentation with human annotators. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 11700\u201311709 (2019)","DOI":"10.1109\/CVPR.2019.01197"},{"key":"11_CR5","doi-asserted-by":"publisher","unstructured":"Cai, Y., Zhou, W., Zhang, L., Yu, L., Luo, T.: Dhfnet: dual-decoding hierarchical fusion network for RGB-thermal semantic segmentation. The Visual Computer, pp. 1\u201311 (2023). https:\/\/doi.org\/10.1007\/s00371-023-02773-6","DOI":"10.1007\/s00371-023-02773-6"},{"key":"11_CR6","doi-asserted-by":"crossref","unstructured":"Canny, J.: A computational approach to edge detection. IEEE Trans. Patt. Anal. and Mach. Intel. 6, 679\u2013698 (1986)","DOI":"10.1109\/TPAMI.1986.4767851"},{"key":"11_CR7","unstructured":"Chen, L.C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)"},{"key":"11_CR8","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Yang, Y., Wang, J., Xu, W., Yuille, A.L.: Attention to scale: scale-aware semantic image segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3640\u20133649 (2016)","DOI":"10.1109\/CVPR.2016.396"},{"key":"11_CR9","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Proceedings of the European conference on computer vision (ECCV), pp. 801\u2013818 (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"11_CR10","unstructured":"Chen, M., Arti\u00e8res, T., Denoyer, L.: Unsupervised object segmentation by redrawing. Adv. Neural Inf. Proce. Syst. 32 (2019)"},{"key":"11_CR11","doi-asserted-by":"crossref","unstructured":"Chen, X., Mottaghi, R., Liu, X., Fidler, S., Urtasun, R., Yuille, A.: Detect what you can: detecting and representing objects using holistic models and body parts. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1971\u20131978 (2014)","DOI":"10.1109\/CVPR.2014.254"},{"key":"11_CR12","doi-asserted-by":"crossref","unstructured":"Cheng, B., et al.: Panoptic-deeplab: a simple, strong, and fast baseline for bottom-up panoptic segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 12475\u201312485 (2020)","DOI":"10.1109\/CVPR42600.2020.01249"},{"key":"11_CR13","doi-asserted-by":"crossref","unstructured":"Cho, J.H., Kr\u00e4henb\u00fchl, P., Ramanathan, V.: Partdistillation: learning parts from instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7152\u20137161 (2023)","DOI":"10.1109\/CVPR52729.2023.00691"},{"key":"11_CR14","unstructured":"Dery, L.M., Dauphin, Y., Grangier, D.: Auxiliary task update decomposition: The good, the bad and the neutral. arXiv preprint arXiv:2108.11346 (2021)"},{"key":"11_CR15","doi-asserted-by":"crossref","unstructured":"Dong, J., Chen, Q., Shen, X., Yang, J., Yan, S.: Towards unified human parsing and pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 843\u2013850 (2014)","DOI":"10.1109\/CVPR.2014.113"},{"key":"11_CR16","doi-asserted-by":"crossref","unstructured":"Dubrovina, A., Xia, F., Achlioptas, P., Shalah, M., Groscot, R., Guibas, L.J.: Composite shape modeling via latent space factorization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8140\u20138149 (2019)","DOI":"10.1109\/ICCV.2019.00823"},{"key":"11_CR17","unstructured":"evadingban123: Computer vision discussion on reddit. https:\/\/www.reddit.com\/r\/computervision\/comments\/m6dno8\/comment\/gr65yvw\/ (2023). Accessed 29 Mar 2023"},{"key":"11_CR18","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vision 88, 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vision"},{"key":"11_CR19","doi-asserted-by":"crossref","unstructured":"Fang, H.S., Lu, G., Fang, X., Xie, J., Tai, Y.W., Lu, C.: Weakly and semi supervised human body part parsing via pose-guided knowledge transfer. arXiv preprint arXiv:1805.04310 (2018)","DOI":"10.1109\/CVPR.2018.00015"},{"key":"11_CR20","doi-asserted-by":"crossref","unstructured":"de\u00a0Geus, D., Meletis, P., Lu, C., Wen, X., Dubbelman, G.: Part-aware panoptic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5485\u20135494 (2021)","DOI":"10.1109\/CVPR46437.2021.00544"},{"key":"11_CR21","doi-asserted-by":"crossref","unstructured":"Gong, K., Liang, X., Li, Y., Chen, Y., Yang, M., Lin, L.: Instance-level human parsing via part grouping network. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 770\u2013785 (2018)","DOI":"10.1007\/978-3-030-01225-0_47"},{"key":"11_CR22","doi-asserted-by":"crossref","unstructured":"Haggag, H., Abobakr, A., Hossny, M., Nahavandi, S.: Semantic body parts segmentation for quadrupedal animals. In: 2016 IEEE International Conference on Systems, Man, and Cybernetics (SMC), pp. 000855\u2013000860. IEEE (2016)","DOI":"10.1109\/SMC.2016.7844347"},{"key":"11_CR23","doi-asserted-by":"crossref","unstructured":"Hariharan, B., Arbel\u00e1ez, P., Girshick, R., Malik, J.: Hypercolumns for object segmentation and fine-grained localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern recognition, pp. 447\u2013456 (2015)","DOI":"10.1109\/CVPR.2015.7298642"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"He, J., Chen, J., Lin, M.X., Yu, Q., Yuille, A.L.: Compositor: bottom-up clustering and compositing for robust part and object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11259\u201311268 (2023)","DOI":"10.1109\/CVPR52729.2023.01083"},{"key":"11_CR25","doi-asserted-by":"publisher","unstructured":"He, J., et al.: Partimagenet: a large, high-quality dataset of parts. In: European Conference on Computer Vision, pp. 128\u2013145. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20074-8_8","DOI":"10.1007\/978-3-031-20074-8_8"},{"key":"11_CR26","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"11_CR27","first-page":"17427","volume":"34","author":"Y Hong","year":"2021","unstructured":"Hong, Y., Yi, L., Tenenbaum, J., Torralba, A., Gan, C.: PTR: a benchmark for part-based conceptual, relational, and physical reasoning. Adv. Neural. Inf. Process. Syst. 34, 17427\u201317440 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"11_CR28","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"11_CR29","doi-asserted-by":"crossref","unstructured":"Huang, H., et al.: Unet 3+: a full-scale connected Unet for medical image segmentation. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1055\u20131059. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053405"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"Ke, B., Obukhov, A., Huang, S., Metzger, N., Daudt, R.C., Schindler, K.: Repurposing diffusion-based image generators for monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2024)","DOI":"10.1109\/CVPR52733.2024.00907"},{"key":"11_CR31","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. arXiv preprint arXiv:2304.02643 (2023)"},{"key":"11_CR32","doi-asserted-by":"crossref","unstructured":"Krause, J., Jin, H., Yang, J., Fei-Fei, L.: Fine-grained recognition without part annotations. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5546\u20135555 (2015)","DOI":"10.1109\/CVPR.2015.7299194"},{"key":"11_CR33","doi-asserted-by":"crossref","unstructured":"Li, G., Xie, Y., Lin, L., Yu, Y.: Instance-level salient object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2386\u20132395 (2017)","DOI":"10.1109\/CVPR.2017.34"},{"key":"11_CR34","doi-asserted-by":"publisher","unstructured":"Li, X., Xu, S., Yang, Y., Cheng, G., Tong, Y., Tao, D.: Panoptic-partformer: learning a unified model for panoptic part segmentation. In: European Conference on Computer Vision, pp. 729\u2013747. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-19812-0_42","DOI":"10.1007\/978-3-031-19812-0_42"},{"issue":"4","key":"11_CR35","doi-asserted-by":"publisher","first-page":"871","DOI":"10.1109\/TPAMI.2018.2820063","volume":"41","author":"X Liang","year":"2018","unstructured":"Liang, X., Gong, K., Shen, X., Lin, L.: Look into person: joint body parsing and pose estimation network and a new benchmark. IEEE Trans. Pattern Anal. Mach. Intell. 41(4), 871\u2013885 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"11_CR36","doi-asserted-by":"publisher","unstructured":"Liang, X., Shen, X., Feng, J., Lin, L., Yan, S.: Semantic object parsing with graph LSTM. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp. 125\u2013143. Springer (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_8","DOI":"10.1007\/978-3-319-46448-0_8"},{"key":"11_CR37","doi-asserted-by":"crossref","unstructured":"Liang, X., Shen, X., Xiang, D., Feng, J., Lin, L., Yan, S.: Semantic object parsing with local-global long short-term memory. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3185\u20133193 (2016)","DOI":"10.1109\/CVPR.2016.347"},{"key":"11_CR38","doi-asserted-by":"crossref","unstructured":"Liang, X., et al.: Human parsing with contextualized convolutional neural network. In: Proceedings of the IEEE International Conference On Computer Vision, pp. 1386\u20131394 (2015)","DOI":"10.1109\/ICCV.2015.163"},{"key":"11_CR39","doi-asserted-by":"publisher","unstructured":"Lin, T.Y., et al.: Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"11_CR40","doi-asserted-by":"crossref","unstructured":"Liu, H., et al.: An end-to-end network for panoptic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6172\u20136181 (2019)","DOI":"10.1109\/CVPR.2019.00633"},{"key":"11_CR41","doi-asserted-by":"crossref","unstructured":"Liu, Q., et al.: Learning part segmentation through unsupervised domain adaptation from synthetic vehicles. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19140\u201319151 (2022)","DOI":"10.1109\/CVPR52688.2022.01855"},{"key":"11_CR42","doi-asserted-by":"crossref","unstructured":"Liu, Y., Zhao, L., Zhang, S., Yang, J.: Hybrid resolution network using edge guided region mutual information loss for human parsing. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1670\u20131678 (2020)","DOI":"10.1145\/3394171.3413831"},{"key":"11_CR43","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"11_CR44","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2021.3097148","volume":"60","author":"A Ma","year":"2022","unstructured":"Ma, A., Wang, J., Zhong, Y., Zheng, Z.: Factseg: foreground activation-driven small object semantic segmentation in large-scale remote sensing imagery. IEEE Trans. Geosci. Remote Sens. 60, 1\u201316 (2022). https:\/\/doi.org\/10.1109\/TGRS.2021.3097148","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"11_CR45","doi-asserted-by":"publisher","unstructured":"Michieli, U., Borsato, E., Rossi, L., Zanuttigh, P.: Gmnet: graph matching network for large scale part semantic segmentation in the wild. In: European Conference on Computer Vision, pp. 397\u2013414. Springer (2020). https:\/\/doi.org\/10.1007\/978-3-030-58598-3_24","DOI":"10.1007\/978-3-030-58598-3_24"},{"issue":"11","key":"11_CR46","doi-asserted-by":"publisher","first-page":"2797","DOI":"10.1007\/s11263-022-01671-z","volume":"130","author":"U Michieli","year":"2022","unstructured":"Michieli, U., Zanuttigh, P.: Edge-aware graph matching network for part-based semantic segmentation. Int. J. Comput. Vision 130(11), 2797\u20132821 (2022)","journal-title":"Int. J. Comput. Vision"},{"key":"11_CR47","doi-asserted-by":"crossref","unstructured":"Nagarajan, T., Feichtenhofer, C., Grauman, K.: Grounded human-object interaction hotspots from video. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8688\u20138697 (2019)","DOI":"10.1109\/ICCV.2019.00878"},{"key":"11_CR48","doi-asserted-by":"crossref","unstructured":"Nie, X., Feng, J., Yan, S.: Mutual learning to adapt for joint human parsing and pose estimation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 502\u2013517 (2018)","DOI":"10.1007\/978-3-030-01228-1_31"},{"key":"11_CR49","doi-asserted-by":"crossref","unstructured":"Pan, T.Y., Liu, Q., Chao, W.L., Price, B.: Towards open-world segmentation of parts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15392\u201315401 (2023)","DOI":"10.1109\/CVPR52729.2023.01477"},{"key":"11_CR50","doi-asserted-by":"crossref","unstructured":"Pohlen, T., Hermans, A., Mathias, M., Leibe, B.: Full-resolution residual networks for semantic segmentation in street scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4151\u20134160 (2017)","DOI":"10.1109\/CVPR.2017.353"},{"key":"11_CR51","doi-asserted-by":"crossref","unstructured":"Pu, M., Huang, Y., Liu, Y., Guan, Q., Ling, H.: Edter: edge detection with transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1402\u20131412 (2022)","DOI":"10.1109\/CVPR52688.2022.00146"},{"key":"11_CR52","doi-asserted-by":"crossref","unstructured":"Pu, M., Huang, Y., Liu, Y., Guan, Q., Ling, H.: EDTER: edge detection with transformer. In: CVPR, pp. 1402\u20131412 (2022)","DOI":"10.1109\/CVPR52688.2022.00146"},{"key":"11_CR53","unstructured":"Sauvalle, B., de\u00a0La\u00a0Fortelle, A.: Unsupervised multi-object segmentation using attention and soft-argmax. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3267\u20133276 (2023)"},{"key":"11_CR54","doi-asserted-by":"crossref","unstructured":"Singh, R., Gupta, P., Shenoy, P., Sarvadevabhatla, R.K.: FLOAT: factorized learning of object attributes for improved multi-object multi-part scene parsing. In: CVPR, pp. 1445\u20131455 (2022)","DOI":"10.1109\/CVPR52688.2022.00150"},{"key":"11_CR55","doi-asserted-by":"crossref","unstructured":"Song, Y., Chen, X., Li, J., Zhao, Q.: Embedding 3D geometric features for rigid object part segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 580\u2013588 (2017)","DOI":"10.1109\/ICCV.2017.70"},{"key":"11_CR56","doi-asserted-by":"publisher","unstructured":"Tan, X., Xu, J., Ye, Z., Hao, J., Ma, L.: Confident semantic ranking loss for part parsing. In: 2021 IEEE International Conference on Multimedia and Expo (ICME), pp.\u00a01\u20136 (2021). https:\/\/doi.org\/10.1109\/ICME51207.2021.9428332","DOI":"10.1109\/ICME51207.2021.9428332"},{"key":"11_CR57","doi-asserted-by":"crossref","unstructured":"Ventura, C., Bellver, M., Girbau, A., Salvador, A., Marques, F., Giro-i Nieto, X.: Rvos: end-to-end recurrent network for video object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5277\u20135286 (2019)","DOI":"10.1109\/CVPR.2019.00542"},{"key":"11_CR58","doi-asserted-by":"crossref","unstructured":"Wang, J., Yuille, A.L.: Semantic part segmentation using compositional model combining shape and appearance. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1788\u20131797 (2015)","DOI":"10.1109\/CVPR.2015.7298788"},{"key":"11_CR59","doi-asserted-by":"crossref","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: European Conference on Computer Vision, pp. 20\u201336. Springer (2016)","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"11_CR60","doi-asserted-by":"crossref","unstructured":"Wang, P., Shen, X., Lin, Z., Cohen, S., Price, B., Yuille, A.L.: Joint object and part segmentation using deep learned potentials. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1573\u20131581 (2015)","DOI":"10.1109\/ICCV.2015.184"},{"key":"11_CR61","unstructured":"Wang, X., Li, S., Kallidromitis, K., Kato, Y., Kozuka, K., Darrell, T.: Hierarchical open-vocabulary universal image segmentation (2023)"},{"key":"11_CR62","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: Lednet: a lightweight encoder-decoder network for real-time semantic segmentation. In: 2019 IEEE international conference on image processing (ICIP), pp. 1860\u20131864. IEEE (2019)","DOI":"10.1109\/ICIP.2019.8803154"},{"key":"11_CR63","unstructured":"Wei, M., Yue, X., Zhang, W., Kong, S., Liu, X., Pang, J.: Ov-parts: towards open-vocabulary part segmentation. In: Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2023)"},{"key":"11_CR64","doi-asserted-by":"crossref","unstructured":"Xiang, F., et\u00a0al.: Sapien: a simulated part-based interactive environment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11097\u201311107 (2020)","DOI":"10.1109\/CVPR42600.2020.01111"},{"key":"11_CR65","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: Segformer: simple and efficient design for semantic segmentation with transformers. Adv. Neural. Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"11_CR66","doi-asserted-by":"crossref","unstructured":"Xie, S., Tu, Z.: Holistically-nested edge detection. In: ICCV, pp. 1395\u20131403 (2015)","DOI":"10.1109\/ICCV.2015.164"},{"key":"11_CR67","doi-asserted-by":"crossref","unstructured":"Yang, J., Wang, C., Li, Z., Wang, J., Zhang, R.: Semantic human parsing via scalable semantic transfer over multiple label domains. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19424\u201319433 (2023)","DOI":"10.1109\/CVPR52729.2023.01861"},{"key":"11_CR68","doi-asserted-by":"crossref","unstructured":"Yang, L., Kang, B., Huang, Z., Xu, X., Feng, J., Zhao, H.: Depth anything: unleashing the power of large-scale unlabeled data. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"11_CR69","doi-asserted-by":"crossref","unstructured":"Yu, C., Wang, J., Peng, C., Gao, C., Yu, G., Sang, N.: Bisenet: bilateral segmentation network for real-time semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 325\u2013341 (2018)","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"11_CR70","doi-asserted-by":"crossref","unstructured":"Zhang, S., Liew, J.H., Wei, Y., Wei, S., Zhao, Y.: Interactive object segmentation with inside-outside guidance. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 12234\u201312244 (2020)","DOI":"10.1109\/CVPR42600.2020.01225"},{"key":"11_CR71","doi-asserted-by":"crossref","unstructured":"Zhao, H., Qi, X., Shen, X., Shi, J., Jia, J.: Icnet for real-time semantic segmentation on high-resolution images. In: Proceedings of the European conference on computer vision (ECCV), pp. 405\u2013420 (2018)","DOI":"10.1007\/978-3-030-01219-9_25"},{"key":"11_CR72","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"11_CR73","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Li, J., Zhang, Y., Tian, Y.: Multi-class part parsing with joint boundary-semantic awareness. In: ICCV, pp. 9177\u20139186 (2019)","DOI":"10.1109\/ICCV.2019.00927"},{"key":"11_CR74","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Zhong, Y., Wang, J., Ma, A.: Foreground-aware relation network for geospatial object segmentation in high spatial resolution remote sensing imagery. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4096\u20134105 (2020)","DOI":"10.1109\/CVPR42600.2020.00415"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73404-5_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T19:43:18Z","timestamp":1745523798000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73404-5_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,30]]},"ISBN":["9783031734038","9783031734045"],"references-count":74,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73404-5_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,30]]},"assertion":[{"value":"30 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}