{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T16:03:35Z","timestamp":1780589015570,"version":"3.54.1"},"publisher-location":"Cham","reference-count":84,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729690","type":"print"},{"value":"9783031729706","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,23]],"date-time":"2024-11-23T00:00:00Z","timestamp":1732320000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,23]],"date-time":"2024-11-23T00:00:00Z","timestamp":1732320000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72970-6_27","type":"book-chapter","created":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T10:52:37Z","timestamp":1732272757000},"page":"476-495","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Open-Vocabulary Camouflaged Object Segmentation"],"prefix":"10.1007","author":[{"given":"Youwei","family":"Pang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaoqi","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiaming","family":"Zuo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lihe","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Huchuan","family":"Lu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,11,23]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"27_CR2","doi-asserted-by":"crossref","unstructured":"Bideau, P., Learned-Miller, E.: It\u2019s moving! a probabilistic model for causal motion segmentation in moving camera videos. In: Proceedings of European Conference on Computer Vision (2016)","DOI":"10.1007\/978-3-319-46484-8_26"},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Caesar, H., Bet al.: nuscenes: a multimodal dataset for autonomous driving. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"27_CR4","doi-asserted-by":"crossref","unstructured":"Caesar, H., Uijlings, J., Ferrari, V.: Coco-stuff: thing and stuff classes in context. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00132"},{"key":"27_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y.C., et al.: Uniter: universal image-text representation learning. In: Proceedings of European Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"27_CR7","unstructured":"Cheng, B., Schwing, A., Kirillov, A.: Per-pixel classification is not all you need for semantic segmentation. In: International Conference on Neural Information Processing Systems (2021)"},{"key":"27_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, X., et al.: Implicit motion handling for video camouflaged object detection. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.01349"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Cherti, M., et al.: Reproducible scaling laws for contrastive language-image learning. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Cho, S., et al.: CAT-seg: cost aggregation for open-vocabulary semantic segmentation. arXiv preprint arXiv:2303.11797 (2023)","DOI":"10.1109\/CVPR52733.2024.00394"},{"key":"27_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/0600000095","volume":"14","author":"G Csurka","year":"2022","unstructured":"Csurka, G., Volpi, R., Chidlovskii, B.: Semantic image segmentation: two decades of research. Found. Trends Comput. Graph. Vision 14, 1\u201362 (2022)","journal-title":"Found. Trends Comput. Graph. Vision"},{"key":"27_CR12","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vision 88, 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vision"},{"key":"27_CR13","doi-asserted-by":"publisher","first-page":"6024","DOI":"10.1109\/TPAMI.2021.3085766","volume":"44","author":"DP Fan","year":"2021","unstructured":"Fan, D.P., Ji, G.P., Cheng, M.M., Shao, L.: Concealed object detection. IEEE Trans. Pattern Anal. Mach. Intell. 44, 6024\u20136042 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"27_CR14","doi-asserted-by":"crossref","unstructured":"Fan, D.P., Ji, G.P., Sun, G., Cheng, M.M., Shen, J., Shao, L.: Camouflaged object detection. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00285"},{"key":"27_CR15","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1007\/s44267-023-00019-6","volume":"1","author":"DP Fan","year":"2023","unstructured":"Fan, D.P., Ji, G.P., Xu, P., Cheng, M.M., Sakaridis, C., Van Gool, L.: Advances in deep concealed scene understanding. Vis. Intelli. 1, 16 (2023)","journal-title":"Vis. Intelli."},{"key":"27_CR16","doi-asserted-by":"publisher","first-page":"2626","DOI":"10.1109\/TMI.2020.2996645","volume":"39","author":"DP Fan","year":"2020","unstructured":"Fan, D.P., et al.: Inf-Net: automatic COVID-19 lung infection segmentation from CT images. IEEE Trans. Med. Imaging 39, 2626\u20132637 (2020)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"27_CR17","doi-asserted-by":"crossref","unstructured":"Ghiasi, G., Gu, X., Cui, Y., Lin, T.Y.: Scaling open-vocabulary image segmentation with image-level labels. In: Proceedings of European Conference on Computer Vision (2022)","DOI":"10.1007\/978-3-031-20059-5_31"},{"key":"27_CR18","unstructured":"Gu, X., Lin, T.Y., Kuo, W., Cui, Y.: Open-vocabulary object detection via vision and language knowledge distillation. In: International Conference on Learning Representations (2021)"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Dollar, P., Girshick, R.: Mask R-CNN. IEEE Trans. Pattern Anal. Mach. Intell. (2020)","DOI":"10.1109\/TPAMI.2018.2844175"},{"key":"27_CR20","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Ji, W., et al.: Multispectral video semantic segmentation: a benchmark dataset and baseline. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00112"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Ji, W., Li, J., Zhang, M., Piao, Y., Lu, H.: Accurate RGB-D salient object detection via collaborative learning. In: Proceedings of European Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-58523-5_4"},{"key":"27_CR23","doi-asserted-by":"publisher","first-page":"2321","DOI":"10.1109\/TIP.2022.3154931","volume":"31","author":"W Ji","year":"2022","unstructured":"Ji, W., et al.: DMRA: depth-induced multi-scale recurrent attention network for RGB-D saliency detection. IEEE Trans. Image Process. 31, 2321\u20132336 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"27_CR24","doi-asserted-by":"crossref","unstructured":"Jia, Q., Yao, S., Liu, Y., Fan, X., Liu, R., Luo, Z.: Segment, magnify and reiterate: detecting camouflaged objects the hard way. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.00467"},{"key":"27_CR25","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1177\/1073858413514136","volume":"20","author":"F Katsuki","year":"2014","unstructured":"Katsuki, F., Constantinidis, C.: Bottom-up and top-down attention: different processes and overlapping neural systems. The Neuroscientist 20, 509\u2013521 (2014)","journal-title":"The Neuroscientist"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Kim, S.Y., et al.: Layered depth refinement with mask guidance. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.00383"},{"key":"27_CR27","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1016\/j.cviu.2019.04.006","volume":"184","author":"TN Le","year":"2019","unstructured":"Le, T.N., Nguyen, T.V., Nie, Z., Tran, M.T., Sugimoto, A.: Anabranch network for camouflaged object segmentation. Comput. Vis. Image Underst. 184, 45\u201356 (2019)","journal-title":"Comput. Vis. Image Underst."},{"key":"27_CR28","doi-asserted-by":"crossref","unstructured":"Li, A., Zhang, J., Lyu, Y., Liu, B., Zhang, T., Dai, Y.: Uncertainty-aware joint salient object and camouflaged object detection. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.00994"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Li, G., Duan, N., Fang, Y., Gong, M., Jiang, D.: Unicoder-VL: a universal encoder for vision and language by cross-modal pre-training. In: AAAI Conference on Artificial Intelligence (2020)","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"27_CR30","unstructured":"Li, J., Ji, W., Wang, S., Li, W., Cheng, L.: DVSOD: RGB-D video salient object detection. In: International Conference on Neural Information Processing Systems (2023)"},{"key":"27_CR31","doi-asserted-by":"publisher","first-page":"855","DOI":"10.1007\/s11263-022-01734-1","volume":"131","author":"J Li","year":"2023","unstructured":"Li, J., Ji, W., Zhang, M., Piao, Y., Lu, H., Cheng, L.: Delving into calibrated depth for accurate RGB-D salient object detection. Int. J. Comput. Vis. 131, 855\u2013876 (2023)","journal-title":"Int. J. Comput. Vis."},{"key":"27_CR32","doi-asserted-by":"crossref","unstructured":"Li, X., et\u00a0al.: Oscar: object-semantics aligned pre-training for vision-language tasks. In: Proceedings of European Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"27_CR33","doi-asserted-by":"crossref","unstructured":"Liang, F., et al.: Open-vocabulary semantic segmentation with mask-adapted clip. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52729.2023.00682"},{"key":"27_CR34","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.media.2017.07.005","volume":"42","author":"G Litjens","year":"2017","unstructured":"Litjens, G., et al.: A survey on deep learning in medical image analysis. Med. Image Anal. 42, 60\u201388 (2017)","journal-title":"Med. Image Anal."},{"key":"27_CR35","doi-asserted-by":"publisher","first-page":"45301","DOI":"10.1109\/ACCESS.2019.2909522","volume":"7","author":"L Liu","year":"2019","unstructured":"Liu, L., et al.: PestNet: an end-to-end deep learning approach for large-scale multi-class pest detection and classification. IEEE Access 7, 45301\u201345312 (2019)","journal-title":"IEEE Access"},{"key":"27_CR36","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"27_CR37","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2019)"},{"key":"27_CR38","unstructured":"Lyu, Y., et al.: Simultaneously localize, segment and rank the camouflaged objects. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2021)"},{"key":"27_CR39","doi-asserted-by":"crossref","unstructured":"Mithun, N.C., Panda, R., Papalexakis, E.E., Roy-Chowdhury, A.K.: Webly supervised joint embedding for cross-modal image-text retrieval. In: Proceedings of the ACM International Conference on Multimedia (2018)","DOI":"10.1145\/3240508.3240712"},{"key":"27_CR40","doi-asserted-by":"crossref","unstructured":"Mottaghi, R., et al.: The role of context for object detection and semantic segmentation in the wild. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2014)","DOI":"10.1109\/CVPR.2014.119"},{"key":"27_CR41","doi-asserted-by":"crossref","unstructured":"Neuhold, G., Ollmann, T., Bulo, S.R., Kontschieder, P.: The mapillary vistas dataset for semantic understanding of street scenes. In: Proceedings of the IEEE International Conference on Computer Vision (2017)","DOI":"10.1109\/ICCV.2017.534"},{"key":"27_CR42","doi-asserted-by":"crossref","unstructured":"Pang, Y., Zhang, L., Zhao, X., Lu, H.: Hierarchical dynamic filtering network for RGB-D salient object detection. In: Proceedings of European Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-58595-2_15"},{"key":"27_CR43","doi-asserted-by":"crossref","unstructured":"Pang, Y., Zhao, X., Xiang, T.Z., Zhang, L., Lu, H.: Zoom in and out: a mixed-scale triplet network for camouflaged object detection. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.00220"},{"key":"27_CR44","doi-asserted-by":"crossref","unstructured":"Pang, Y., Zhao, X., Xiang, T.Z., Zhang, L., Lu, H.: ZoomNeXt: a unified collaborative pyramid network for camouflaged object detection. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3417329"},{"key":"27_CR45","doi-asserted-by":"crossref","unstructured":"Pang, Y., Zhao, X., Zhang, L., Lu, H.: Multi-scale interactive network for salient object detection. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00943"},{"key":"27_CR46","doi-asserted-by":"publisher","first-page":"892","DOI":"10.1109\/TIP.2023.3234702","volume":"32","author":"Y Pang","year":"2023","unstructured":"Pang, Y., Zhao, X., Zhang, L., Lu, H.: CAVER: cross-modal view-mixed transformer for bi-modal salient object detection. IEEE Trans. Image Process. 32, 892\u2013904 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"27_CR47","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: Proceedings of the International Conference on Machine Learning (2021)"},{"key":"27_CR48","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"27_CR49","first-page":"44","volume":"7","author":"M Rizzo","year":"2023","unstructured":"Rizzo, M., Marcuzzo, M., Zangari, A., Gasparetto, A., Albarelli, A.: Fruit ripeness classification: a survey. Artif. Intell. Agric. 7, 44\u201357 (2023)","journal-title":"Artif. Intell. Agric."},{"key":"27_CR50","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"27_CR51","unstructured":"Skurowski, P., Abdulameer, H., B\u0142aszczyk, J., Depta, T., Kornacki, A., Kozie\u0142, P.: Animal camouflage analysis: Chameleon database (2017). http:\/\/kgwisc.aei.polsl.pl\/index.php\/pl\/dataset\/63-animal-camouflage-analysis"},{"key":"27_CR52","unstructured":"Su, W., et al.: VL-BERT: pre-training of generic visual-linguistic representations. In: International Conference on Learning Representations (2019)"},{"key":"27_CR53","doi-asserted-by":"crossref","unstructured":"Sun, Y., Wang, S., Chen, C., Xiang, T.Z.: Boundary-guided camouflaged object detection. In: International Joint Conference on Artificial Intelligence (2022)","DOI":"10.24963\/ijcai.2022\/186"},{"key":"27_CR54","doi-asserted-by":"crossref","unstructured":"Thisanke, H., Deshan, C., Chamith, K., Seneviratne, S., Vidanaarachchi, R., Herath, D.: Semantic segmentation using vision transformers: a survey. arXiv preprint arXiv:2305.03273 (2023)","DOI":"10.1016\/j.engappai.2023.106669"},{"key":"27_CR55","unstructured":"Vaswani, A., et al.: Attention is all you need. In: International Conference on Neural Information Processing Systems (2017)"},{"key":"27_CR56","unstructured":"Wu, J., et al.: Towards open vocabulary learning: a survey. arXiv preprint arXiv:2306.15880 (2024)"},{"key":"27_CR57","unstructured":"Xiang, M., Zhang, J., Lv, Y., Li, A., Zhong, Y., Dai, Y.: Exploring depth contribution for camouflaged object detection. arXiv:2106.13217 (2022)"},{"key":"27_CR58","doi-asserted-by":"crossref","unstructured":"Xu, J., Liu, S., Vahdat, A., Byeon, W., Wang, X., De\u00a0Mello, S.: Open-vocabulary panoptic segmentation with text-to-image diffusion models. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00289"},{"key":"27_CR59","doi-asserted-by":"crossref","unstructured":"Xu, M., Zhang, Z., Wei, F., Hu, H., Bai, X.: Side adapter network for open-vocabulary semantic segmentation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00288"},{"key":"27_CR60","doi-asserted-by":"crossref","unstructured":"Xu, M., et al.: A simple baseline for open-vocabulary semantic segmentation with pre-trained vision-language model. In: Proceedings of European Conference on Computer Vision (2021)","DOI":"10.1007\/978-3-031-19818-2_42"},{"key":"27_CR61","unstructured":"Yang, J.: Plantcamo dataset (2023). https:\/\/github.com\/yjybuaa\/PlantCamo"},{"key":"27_CR62","unstructured":"Yin, B., Zhang, X., Hou, Q., Sun, B.Y., Fan, D.P., Van\u00a0Gool, L.: CamoFormer: masked separable attention for camouflaged object detection. arXiv:2212.06570 (2022)"},{"key":"27_CR63","unstructured":"Yu, Q., Zhao, X., Pang, Y., Zhang, L., Lu, H.: Multi-view aggregation network for dichotomous image segmentation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2024)"},{"key":"27_CR64","unstructured":"Yu, Q., He, J., Deng, X., Shen, X., Chen, L.C.: Convolutions die hard: open-vocabulary segmentation with single frozen convolutional clip. In: International Conference on Neural Information Processing Systems (2023)"},{"key":"27_CR65","doi-asserted-by":"crossref","unstructured":"Zabari, N., Hoshen, Y.: Open-vocabulary semantic segmentation using test-time distillation. In: European Conference on Computer Vision Workshops (2023)","DOI":"10.1007\/978-3-031-25063-7_4"},{"key":"27_CR66","doi-asserted-by":"publisher","first-page":"6276","DOI":"10.1109\/TIP.2020.2990341","volume":"29","author":"M Zhang","year":"2020","unstructured":"Zhang, M., et al.: LFNet: light field fusion network for salient object detection. IEEE Trans. Image Process. 29, 6276\u20136287 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"27_CR67","doi-asserted-by":"crossref","unstructured":"Zhang, M., et al.: Dynamic context-sensitive filtering network for video salient object detection. In: Proceedings of the IEEE International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00158"},{"key":"27_CR68","doi-asserted-by":"publisher","first-page":"5142","DOI":"10.1109\/TMM.2022.3187856","volume":"25","author":"M Zhang","year":"2023","unstructured":"Zhang, M., Yao, S., Hu, B., Piao, Y., Ji, W.: C2DFNet: criss-cross dynamic filter network for RGB-D salient object detection. IEEE Trans. Multimedia 25, 5142\u20135154 (2023)","journal-title":"IEEE Trans. Multimedia"},{"key":"27_CR69","unstructured":"Zhang, W., Pang, J., Chen, K., Loy, C.C.: K-net: towards unified image segmentation. In: International Conference on Neural Information Processing Systems (2021)"},{"key":"27_CR70","doi-asserted-by":"crossref","unstructured":"Zhao, H., Puig, X., Zhou, B., Fidler, S., Torralba, A.: Open vocabulary scene parsing. In: Proceedings of the IEEE International Conference on Computer Vision (2017)","DOI":"10.1109\/ICCV.2017.221"},{"key":"27_CR71","doi-asserted-by":"crossref","unstructured":"Zhao, X., Chang, S., Pang, Y., Yang, J., Zhang, L., Lu, H.: Multi-source fusion and automatic predictor selection for zero-shot video object segmentation. Int. J. Comput. Vis. (2024)","DOI":"10.1007\/s11263-024-02024-8"},{"key":"27_CR72","unstructured":"Zhao, X., et al.: Spider: a unified framework for context-dependent concept understanding. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2024)"},{"key":"27_CR73","doi-asserted-by":"publisher","first-page":"7350","DOI":"10.1109\/TIP.2022.3222641","volume":"31","author":"X Zhao","year":"2022","unstructured":"Zhao, X., Pang, Y., Zhang, L., Lu, H.: Joint learning of salient object detection, depth estimation and contour extraction. IEEE Trans. Image Process. 31, 7350\u20137362 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"27_CR74","doi-asserted-by":"crossref","unstructured":"Zhao, X., Pang, Y., Zhang, L., Lu, H., Ruan, X.: Self-supervised pretraining for RGB-D salient object detection. In: AAAI Conference on Artificial Intelligence (2022)","DOI":"10.1609\/aaai.v36i3.20257"},{"key":"27_CR75","doi-asserted-by":"crossref","unstructured":"Zhao, X., Pang, Y., Zhang, L., Lu, H., Zhang, L.: Suppress and balance: a simple gated network for salient object detection. In: Proceedings of European Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-58536-5_3"},{"key":"27_CR76","doi-asserted-by":"publisher","first-page":"4157","DOI":"10.1007\/s11263-024-02058-y","volume":"132","author":"X Zhao","year":"2024","unstructured":"Zhao, X., Pang, Y., Zhang, L., Lu, H., Zhang, L.: Towards diverse binary segmentation via a simple yet general gated network. Int. J. Comput. Vis. 132, 4157\u20134234 (2024)","journal-title":"Int. J. Comput. Vis."},{"key":"27_CR77","doi-asserted-by":"crossref","unstructured":"Zhao, X., Zhang, L., Pang, Y., Lu, H., Zhang, L.: A single stream network for robust and real-time RGB-D salient object detection. In: Proceedings of European Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-58542-6_39"},{"key":"27_CR78","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1109\/LSP.2018.2825959","volume":"26","author":"Y Zheng","year":"2019","unstructured":"Zheng, Y., Zhang, X., Wang, F., Cao, T., Sun, M., Wang, X.: Detection of people with camouflage pattern via dense deconvolution network. IEEE Signal Process. Lett. 26, 29\u201333 (2019)","journal-title":"IEEE Signal Process. Lett."},{"key":"27_CR79","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Scene parsing through ADE20K dataset. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.544"},{"key":"27_CR80","doi-asserted-by":"crossref","unstructured":"Zhou, C., Loy, C.C., Dai, B.: Extract free dense labels from clip. In: Proceedings of European Conference on Computer Vision (2022)","DOI":"10.1007\/978-3-031-19815-1_40"},{"key":"27_CR81","unstructured":"Zhou, H., et al.: Rethinking evaluation metrics of open-vocabulary segmentaion. ArXiv arXiv:2311.03352 (2023)"},{"key":"27_CR82","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2021","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vis. 130, 2337\u20132348 (2021)","journal-title":"Int. J. Comput. Vis."},{"key":"27_CR83","unstructured":"Zhu, C., Chen, L.: A survey on open-vocabulary detection and segmentation: past, present, and future. arXiv preprint arXiv:2307.09220 (2023)"},{"key":"27_CR84","unstructured":"Zhu, F., Zhu, Y., Lee, V., Liang, X., Chang, X.: Deep learning for embodied vision navigation: a survey. arXiv preprint arXiv:2108.04097 (2021)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72970-6_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T11:20:23Z","timestamp":1732274423000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72970-6_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,23]]},"ISBN":["9783031729690","9783031729706"],"references-count":84,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72970-6_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,23]]},"assertion":[{"value":"23 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}