{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:07:29Z","timestamp":1755907649991,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,8]],"date-time":"2023-12-08T00:00:00Z","timestamp":1701993600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,8]]},"DOI":"10.1145\/3638584.3638624","type":"proceedings-article","created":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T11:15:19Z","timestamp":1710414919000},"page":"233-238","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Image Segmentation with Vision-Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9530-694X","authenticated-orcid":false,"given":"Lihu","family":"Pan","sequence":"first","affiliation":[{"name":"Taiyuan University of Science and Technology, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2195-622X","authenticated-orcid":false,"given":"Yunting","family":"Yang","sequence":"additional","affiliation":[{"name":"Taiyuan University of Science and Technology, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4554-0791","authenticated-orcid":false,"given":"Zhengkui","family":"Wang","sequence":"additional","affiliation":[{"name":"InfoComm Technology Cluster, Singapore Institute of Technology, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7767-7413","authenticated-orcid":false,"given":"Rui","family":"Zhang","sequence":"additional","affiliation":[{"name":"Taiyuan University of Science and Technology, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7377-8943","authenticated-orcid":false,"given":"Wen","family":"Shan","sequence":"additional","affiliation":[{"name":"Singapore University of Social Sciences, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6686-3857","authenticated-orcid":false,"given":"Jiashu","family":"Li","sequence":"additional","affiliation":[{"name":"Taiyuan University of Science and Technology, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,3,14]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Donghyeon Baek Youngmin Oh and Bumsub Ham. 2021. Exploiting a joint embedding space for generalized zero-shot semantic segmentation. In ICCV. 9536\u20139545."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Malik Boudiaf Hoel Kervadec Ziko\u00a0Imtiaz Masud Pablo Piantanida Ismail Ben\u00a0Ayed and Jose Dolz. 2021. Few-shot segmentation without meta-learning: A good transductive inference is all you need?. In CVPR. 13979\u201313988.","DOI":"10.1109\/CVPR46437.2021.01376"},{"key":"e_1_3_2_1_3_1","volume-title":"Zero-shot semantic segmentation. Advances in Neural Information Processing Systems 32","author":"Bucher Maxime","year":"2019","unstructured":"Maxime Bucher, Tuan-Hung Vu, Matthieu Cord, and Patrick P\u00e9rez. 2019. Zero-shot semantic segmentation. Advances in Neural Information Processing Systems 32 (2019)."},{"volume-title":"End-to-end object detection with transformers","author":"Carion Nicolas","key":"e_1_3_2_1_4_1","unstructured":"Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. 2020. End-to-end object detection with transformers. In ECCV. Springer, 213\u2013229."},{"key":"e_1_3_2_1_5_1","first-page":"105431","article-title":"MGNet: Mutual-guidance network for few-shot semantic segmentation","volume":"116","author":"Chang Zhaobin","year":"2022","unstructured":"Zhaobin Chang, Yonggang Lu, Xiangwen Wang, and Xingcheng Ran. 2022. MGNet: Mutual-guidance network for few-shot semantic segmentation. EAAI 116 (2022), 105431.","journal-title":"EAAI"},{"key":"e_1_3_2_1_6_1","first-page":"109018","article-title":"Self-regularized prototypical network for few-shot semantic segmentation","volume":"133","author":"Ding Henghui","year":"2023","unstructured":"Henghui Ding, Hui Zhang, and Xudong Jiang. 2023. Self-regularized prototypical network for few-shot semantic segmentation. PR 133 (2023), 109018.","journal-title":"PR"},{"key":"e_1_3_2_1_7_1","unstructured":"Nanqing Dong and Eric\u00a0P Xing. 2018. Few-shot semantic segmentation with prototype learning. In BMVC Vol.\u00a03."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.23915\/distill.00011"},{"key":"e_1_3_2_1_9_1","first-page":"103754","article-title":"Hierarchical context-agnostic network with contrastive feature diversity for one-shot semantic segmentation","volume":"90","author":"Fang Zhiyuan","year":"2023","unstructured":"Zhiyuan Fang, Guangyu Gao, Zekang Zhang, and Anqi Zhang. 2023. Hierarchical context-agnostic network with contrastive feature diversity for one-shot semantic segmentation. JVCIR 90 (2023), 103754.","journal-title":"JVCIR"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Zhangxuan Gu Siyuan Zhou Li Niu Zihan Zhao and Liqing Zhang. 2020. Context-aware feature generation for zero-shot semantic segmentation. In ACM MM. 1921\u20131929.","DOI":"10.1145\/3394171.3413593"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Aishwarya Kamath Mannat Singh Yann LeCun Gabriel Synnaeve Ishan Misra and Nicolas Carion. 2021. Mdetr-modulated detection for end-to-end multi-modal understanding. In ICCV. 1780\u20131790.","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"e_1_3_2_1_12_1","first-page":"106","article-title":"An image segmentation using improved FCM watershed algorithm and DBMF","volume":"2","author":"Kaur Rupinder","year":"2014","unstructured":"Rupinder Kaur and Er\u00a0Garima Malik. 2014. An image segmentation using improved FCM watershed algorithm and DBMF. Journal of Image and Graphics 2, 2 (2014), 106\u2013112.","journal-title":"Journal of Image and Graphics"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1049\/cit2.12261"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3216393"},{"key":"e_1_3_2_1_15_1","volume-title":"Language-driven Semantic Segmentation. CoRR abs\/2201.03546","author":"Li Boyi","year":"2022","unstructured":"Boyi Li, Kilian\u00a0Q. Weinberger, Serge\u00a0J. Belongie, Vladlen Koltun, and Ren\u00e9 Ranftl. 2022. Language-driven Semantic Segmentation. CoRR abs\/2201.03546 (2022). arXiv:2201.03546https:\/\/arxiv.org\/abs\/2201.03546"},{"key":"e_1_3_2_1_16_1","first-page":"10317","article-title":"Consistent structural relation learning for zero-shot segmentation","volume":"33","author":"Li Peike","year":"2020","unstructured":"Peike Li, Yunchao Wei, and Yi Yang. 2020. Consistent structural relation learning for zero-shot segmentation. NeurIPS 33 (2020), 10317\u201310327.","journal-title":"NeurIPS"},{"volume-title":"Microsoft coco: Common objects in context","author":"Lin Tsung-Yi","key":"e_1_3_2_1_17_1","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C\u00a0Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In ECCV. Springer, 740\u2013755."},{"volume-title":"Part-aware prototype network for few-shot semantic segmentation","author":"Liu Yongfei","key":"e_1_3_2_1_18_1","unstructured":"Yongfei Liu, Xiangyi Zhang, Songyang Zhang, and Xuming He. 2020. Part-aware prototype network for few-shot semantic segmentation. In ECCV. Springer, 142\u2013158."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Timo L\u00fcddecke and Alexander Ecker. 2022. Image segmentation using text and image prompts. In CVPR. 7086\u20137096.","DOI":"10.1109\/CVPR52688.2022.00695"},{"key":"e_1_3_2_1_20_1","unstructured":"Juhong Min Dahyun Kang and Minsu Cho. 2021. Hypercorrelation squeeze for few-shot segmentation. In ICCV. 6941\u20136952."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Roozbeh Mottaghi Xianjie Chen Xiaobai Liu Nam-Gyu Cho Seong-Whan Lee Sanja Fidler Raquel Urtasun and Alan Yuille. 2014. The role of context for object detection and semantic segmentation in the wild. In CVPR. 891\u2013898.","DOI":"10.1109\/CVPR.2014.119"},{"key":"e_1_3_2_1_22_1","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark 2021. Learning transferable visual models from natural language supervision. In ICML. PMLR 8748\u20138763."},{"key":"e_1_3_2_1_23_1","volume-title":"Few-shot segmentation propagation with guided networks. arXiv preprint arXiv:1806.07373","author":"Rakelly Kate","year":"2018","unstructured":"Kate Rakelly, Evan Shelhamer, Trevor Darrell, Alexei\u00a0A Efros, and Sergey Levine. 2018. Few-shot segmentation propagation with guided networks. arXiv preprint arXiv:1806.07373 (2018)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.12720\/joig.1.3.157-160"},{"key":"e_1_3_2_1_25_1","volume-title":"One-shot learning for semantic segmentation. arXiv preprint arXiv:1709.03410","author":"Shaban Amirreza","year":"2017","unstructured":"Amirreza Shaban, Shray Bansal, Zhen Liu, Irfan Essa, and Byron Boots. 2017. One-shot learning for semantic segmentation. arXiv preprint arXiv:1709.03410 (2017)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2023.103911"},{"key":"e_1_3_2_1_27_1","first-page":"113","article-title":"Effective histogram thresholding techniques for natural images using segmentation","volume":"2","author":"Thanammal KK","year":"2014","unstructured":"KK Thanammal, JS Jayasudha, RR Vijayalakshmi, and S Arumugaperumal. 2014. Effective histogram thresholding techniques for natural images using segmentation. Journal of Image and Graphics 2, 2 (2014), 113\u2013116.","journal-title":"Journal of Image and Graphics"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3013717"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3232917"},{"key":"e_1_3_2_1_30_1","volume-title":"Phrasecut: Language-based image segmentation in the wild. In ICCV. 10216\u201310225.","author":"Wu Chenyun","year":"2020","unstructured":"Chenyun Wu, Zhe Lin, Scott Cohen, Trung Bui, and Subhransu Maji. 2020. Phrasecut: Language-based image segmentation in the wild. In ICCV. 10216\u201310225."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Yongqin Xian Subhabrata Choudhury Yang He Bernt Schiele and Zeynep Akata. 2019. Semantic projection network for zero-and few-label semantic segmentation. In CVPR. 8256\u20138265.","DOI":"10.1109\/CVPR.2019.00845"},{"key":"e_1_3_2_1_32_1","volume-title":"Groupvit: Semantic segmentation emerges from text supervision. In CVPR. 18134\u201318144.","author":"Xu Jiarui","year":"2022","unstructured":"Jiarui Xu, Shalini De\u00a0Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, and Xiaolong Wang. 2022. Groupvit: Semantic segmentation emerges from text supervision. In CVPR. 18134\u201318144."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Chi Zhang Guosheng Lin Fayao Liu Jiushuang Guo Qingyao Wu and Rui Yao. 2019. Pyramid graph networks with connection attentions for region-based one-shot semantic segmentation. In ICCV. 9587\u20139595.","DOI":"10.1109\/ICCV.2019.00968"},{"key":"e_1_3_2_1_34_1","volume-title":"Canet: Class-agnostic segmentation networks with iterative refinement and attentive few-shot learning. In CVPR. 5217\u20135226.","author":"Zhang Chi","year":"2019","unstructured":"Chi Zhang, Guosheng Lin, Fayao Liu, Rui Yao, and Chunhua Shen. 2019. Canet: Class-agnostic segmentation networks with iterative refinement and attentive few-shot learning. In CVPR. 5217\u20135226."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Hui Zhang and Henghui Ding. 2021. Prototypical matching and open set rejection for zero-shot semantic segmentation. In ICCV. 6974\u20136983.","DOI":"10.1109\/ICCV48922.2021.00689"}],"event":{"name":"CSAI 2023: 2023 7th International Conference on Computer Science and Artificial Intelligence","acronym":"CSAI 2023","location":"Beijing China"},"container-title":["Proceedings of the 2023 7th International Conference on Computer Science and Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3638584.3638624","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3638584.3638624","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T14:55:47Z","timestamp":1755874547000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3638584.3638624"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,8]]},"references-count":35,"alternative-id":["10.1145\/3638584.3638624","10.1145\/3638584"],"URL":"https:\/\/doi.org\/10.1145\/3638584.3638624","relation":{},"subject":[],"published":{"date-parts":[[2023,12,8]]},"assertion":[{"value":"2024-03-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}