{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:10:05Z","timestamp":1765339805356,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62471278,62302141,62331003"],"award-info":[{"award-number":["62471278,62302141,62331003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010040","name":"Taishan Scholar Project of Shandong Province","doi-asserted-by":"publisher","award":["tsqn202306079"],"award-info":[{"award-number":["tsqn202306079"]}],"id":[{"id":"10.13039\/501100010040","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Research Grants Council of the Hong Kong Special Administrative Region, China","award":["STG5\/E-103\/24-R"],"award-info":[{"award-number":["STG5\/E-103\/24-R"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["JZ2024HGTB0255"],"award-info":[{"award-number":["JZ2024HGTB0255"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754967","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:56:43Z","timestamp":1761371803000},"page":"3067-3075","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["MM-Prompt: Multi-modality and Multi-granularity Prompts for Few-Shot Segmentation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-1777-6724","authenticated-orcid":false,"given":"Hang","family":"Xiong","sequence":"first","affiliation":[{"name":"Beijing Jiaotong University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0972-4008","authenticated-orcid":false,"given":"Runmin","family":"Cong","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, Shandong, China and Key Laboratory of Machine Intelligence and System Control, Ministry of Education, Jinan, Shandong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0469-4463","authenticated-orcid":false,"given":"Jinpeng","family":"Chen","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0492-2489","authenticated-orcid":false,"given":"Chen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, Shandong, China and Key Laboratory of Machine Intelligence and System Control, Ministry of Education, Jinan, Shandong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9862-0432","authenticated-orcid":false,"given":"Feng","family":"Li","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3879-8957","authenticated-orcid":false,"given":"Huihui","family":"Bai","sequence":"additional","affiliation":[{"name":"Beijing Jiaotong University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7484-7261","authenticated-orcid":false,"given":"Sam","family":"Kwong","sequence":"additional","affiliation":[{"name":"Lingnan University, Hong Kong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27834"},{"key":"e_1_3_2_2_2_1","volume-title":"Beyond Mask: Rethinking Guidance Types in Few-shot Segmentation. PR","author":"Chang Shijie","year":"2024","unstructured":"Shijie Chang, Youwei Pang, Xiaoqi Zhao, Lihe Zhang, and Huchuan Lu. 2024. Beyond Mask: Rethinking Guidance Types in Few-shot Segmentation. PR (2024)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3545966"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3361181"},{"key":"e_1_3_2_2_5_1","first-page":"4650","volume-title":"IEEE TPAMI","volume":"45","author":"Cheng Gong","year":"2023","unstructured":"Gong Cheng, Chunbo Lang, and Junwei Han. 2023. Holistic Prototype Activation for Few-Shot Segmentation. IEEE TPAMI, Vol. 45, 4 (2023), 4650-4666."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3352921"},{"key":"e_1_3_2_2_7_1","first-page":"8440","article-title":"Unsupervised Cross-lingual Representation Learning at Scale","author":"Conneau Alexis","year":"2020","unstructured":"Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzm\u00e1n, Edouard Grave, Myle Ott, Luke Zettlemoyer, and Veselin Stoyanov. 2020. Unsupervised Cross-lingual Representation Learning at Scale. In ACL. 8440-8451.","journal-title":"ACL."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19800-7_41"},{"key":"e_1_3_2_2_10_1","first-page":"13821","article-title":"Decoupled Motion Expression Video Segmentation","author":"Fang Hao","year":"2025","unstructured":"Hao Fang, Runmin Cong, Xiankai Lu, Xiaofei Zhou, Sam Kwong, and Wei Zhang. 2025. Decoupled Motion Expression Video Segmentation. In CVPR. 13821-13831.","journal-title":"CVPR."},{"key":"e_1_3_2_2_11_1","first-page":"991","article-title":"Semantic contours from inverse detectors","author":"Hariharan Bharath","year":"2011","unstructured":"Bharath Hariharan, Pablo Arbelaez, Lubomir D. Bourdev, Subhransu Maji, and Jitendra Malik. 2011. Semantic contours from inverse detectors. In CVPR. 991-998.","journal-title":"CVPR."},{"key":"e_1_3_2_2_12_1","first-page":"770","article-title":"Deep Residual Learning for Image Recognition","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In CVPR. 770-778.","journal-title":"CVPR."},{"key":"e_1_3_2_2_13_1","unstructured":"Lei Ke Mingqiao Ye Martin Danelljan Yifan Liu Yu-Wing Tai Chi-Keung Tang and Fisher Yu. 2023. Segment Anything in High Quality. In NeurIPS."},{"key":"e_1_3_2_2_14_1","first-page":"4015","article-title":"Segment anything","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander C Berg, Wan-Yen Lo, et al., 2023. Segment anything. In ICCV. 4015-4026.","journal-title":"ICCV."},{"key":"e_1_3_2_2_15_1","first-page":"9579","article-title":"LISA: Reasoning Segmentation via Large Language Model","author":"Lai Xin","year":"2024","unstructured":"Xin Lai, Zhuotao Tian, Yukang Chen, Yanwei Li, Yuhui Yuan, Shu Liu, and Jiaya Jia. 2024. LISA: Reasoning Segmentation via Large Language Model. In CVPR. 9579-9589.","journal-title":"CVPR."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01886-8"},{"key":"e_1_3_2_2_17_1","first-page":"740","article-title":"Microsoft COCO","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge J. Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In ECCV. 740-755.","journal-title":"Common Objects in Context. In ECCV."},{"key":"e_1_3_2_2_18_1","volume-title":"Nian Liu, and Fahad Shahbaz Khan.","author":"Liu Yuanwei","year":"2024","unstructured":"Yuanwei Liu, Junwei Han, Xiwen Yao, Salman Khan, Hisham Cholakkal, Rao Muhammad Anwer, Nian Liu, and Fahad Shahbaz Khan. 2024. Bidirectional Reciprocative Information Communication for Few-Shot Semantic Segmentation. In ICML."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58545-7_9"},{"key":"e_1_3_2_2_20_1","first-page":"6921","article-title":"Hypercorrelation Squeeze for Few-Shot Segmenation","author":"Min Juhong","year":"2021","unstructured":"Juhong Min, Dahyun Kang, and Minsu Cho. 2021. Hypercorrelation Squeeze for Few-Shot Segmenation. In ICCV. 6921-6932.","journal-title":"ICCV."},{"key":"e_1_3_2_2_21_1","first-page":"19209","article-title":"MSI","author":"Moon Seonghyeon","year":"2023","unstructured":"Seonghyeon Moon, Samuel S. Sohn, Honglu Zhou, Sejong Yoon, Vladimir Pavlovic, Muhammad Haris Khan, and Mubbasir Kapadia. 2023. MSI: Maximize Support-Set Information for Few-Shot Segmentation. In ICCV. 19209-19219.","journal-title":"Maximize Support-Set Information for Few-Shot Segmentation. In ICCV."},{"key":"e_1_3_2_2_22_1","first-page":"622","article-title":"Feature Weighting and Boosting for Few-Shot Segmentation","author":"Nguyen Khoi","year":"2019","unstructured":"Khoi Nguyen and Sinisa Todorovic. 2019. Feature Weighting and Boosting for Few-Shot Segmentation. In ICCV. 622-631.","journal-title":"ICCV."},{"key":"e_1_3_2_2_23_1","volume-title":"Jaejoon Yoo, and Jae-Pil Heo.","author":"Park Suho","year":"2025","unstructured":"Suho Park, SuBeen Lee, Hyun Seok Seong, Jaejoon Yoo, and Jae-Pil Heo. 2025. Foreground-Covering Prototype Generation and Matching for SAM-Aided Few-Shot Segmentation. In AAAI."},{"key":"e_1_3_2_2_24_1","first-page":"23641","article-title":"Hierarchical Dense Correlation Distillation for Few-Shot Segmentation","author":"Peng Bohao","year":"2023","unstructured":"Bohao Peng, Zhuotao Tian, Xiaoyang Wu, Chengyao Wang, Shu Liu, Jingyong Su, and Jiaya Jia. 2023. Hierarchical Dense Correlation Distillation for Few-Shot Segmentation. In CVPR. 23641-23651.","journal-title":"CVPR."},{"key":"e_1_3_2_2_25_1","first-page":"18061","article-title":"DenseCLIP","author":"Rao Yongming","year":"2022","unstructured":"Yongming Rao, Wenliang Zhao, Guangyi Chen, Yansong Tang, Zheng Zhu, Guan Huang, Jie Zhou, and Jiwen Lu. 2022. DenseCLIP: Language-Guided Dense Prediction with Context-Aware Prompting. In CVPR. 18061-18070.","journal-title":"Language-Guided Dense Prediction with Context-Aware Prompting. In CVPR."},{"key":"e_1_3_2_2_26_1","volume-title":"Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. CoRR","author":"Ren Tianhe","year":"2024","unstructured":"Tianhe Ren, Shilong Liu, Ailing Zeng, Jing Lin, Kunchang Li, He Cao, Jiayu Chen, Xinyu Huang, Yukang Chen, Feng Yan, Zhaoyang Zeng, Hao Zhang, Feng Li, Jie Yang, Hongyang Li, Qing Jiang, and Lei Zhang. 2024. Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. CoRR, Vol. abs\/2401.14159 (2024)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_2_28_1","first-page":"1","article-title":"One-Shot Learning for Semantic Segmentation","author":"Shaban Amirreza","year":"2017","unstructured":"Amirreza Shaban, Shray Bansal, Zhen Liu, Irfan Essa, and Byron Boots. 2017. One-Shot Learning for Semantic Segmentation. In BMVC. 1-13.","journal-title":"BMVC."},{"key":"e_1_3_2_2_29_1","first-page":"23565","article-title":"VRP-SAM","author":"Sun Yanpeng","year":"2024","unstructured":"Yanpeng Sun, Jiahui Chen, Shan Zhang, Xinyu Zhang, Qiang Chen, Gang Zhang, Errui Ding, Jingdong Wang, and Zechao Li. 2024. VRP-SAM: SAM with Visual Reference Prompt. In CVPR. 23565-23574.","journal-title":"SAM with Visual Reference Prompt. In CVPR."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3013717"},{"key":"e_1_3_2_2_31_1","first-page":"3941","article-title":"Rethinking Prior Information Generation with CLIP for Few-Shot Segmentation","author":"Wang Jin","year":"2024","unstructured":"Jin Wang, Bingfeng Zhang, Jian Pang, Honglong Chen, and Weifeng Liu. 2024. Rethinking Prior Information Generation with CLIP for Few-Shot Segmentation. In CVPR. 3941-3951.","journal-title":"CVPR."},{"key":"e_1_3_2_2_32_1","first-page":"9196","article-title":"PANet","author":"Wang Kaixin","year":"2019","unstructured":"Kaixin Wang, Jun Hao Liew, Yingtian Zou, Daquan Zhou, and Jiashi Feng. 2019. PANet: Few-Shot Image Semantic Segmentation With Prototype Alignment. In ICCV. 9196-9205.","journal-title":"Few-Shot Image Semantic Segmentation With Prototype Alignment. In ICCV."},{"key":"e_1_3_2_2_33_1","first-page":"19175","article-title":"Image as a Foreign Language","author":"Wang Wenhui","year":"2023","unstructured":"Wenhui Wang, Hangbo Bao, Li Dong, Johan Bjorck, Zhiliang Peng, Qiang Liu, Kriti Aggarwal, Owais Khan Mohammed, Saksham Singhal, Subhojit Som, and Furu Wei. 2023a. Image as a Foreign Language: BEIT Pretraining for Vision and Vision-Language Tasks. In CVPR. 19175-19186.","journal-title":"BEIT Pretraining for Vision and Vision-Language Tasks. In CVPR."},{"key":"e_1_3_2_2_34_1","unstructured":"Yuan Wang Naisong Luo and Tianzhu Zhang. 2023b. Focus on Query: Adversarial Mining Transformer for Few-Shot Segmentation. In NeurIPS."},{"key":"e_1_3_2_2_35_1","first-page":"7183","article-title":"Rethinking the Correlation in Few-Shot Segmentation","author":"Wang Yuan","year":"2023","unstructured":"Yuan Wang, Rui Sun, and Tianzhu Zhang. 2023c. Rethinking the Correlation in Few-Shot Segmentation: A Buoys View. In CVPR. 7183-7192.","journal-title":"A Buoys View. In CVPR."},{"key":"e_1_3_2_2_36_1","first-page":"16111","article-title":"EfficientSAM","author":"Xiong Yunyang","year":"2024","unstructured":"Yunyang Xiong, Bala Varadarajan, Lemeng Wu, Xiaoyu Xiang, Fanyi Xiao, Chenchen Zhu, Xiaoliang Dai, Dilin Wang, Fei Sun, Forrest N. Iandola, Raghuraman Krishnamoorthi, and Vikas Chandra. 2024. EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything. In CVPR. 16111-16121.","journal-title":"Leveraged Masked Image Pretraining for Efficient Segment Anything. In CVPR."},{"key":"e_1_3_2_2_37_1","first-page":"416","article-title":"Eliminating feature ambiguity for few-shot segmentation","author":"Xu Qianxiong","year":"2025","unstructured":"Qianxiong Xu, Guosheng Lin, Chen Change Loy, Cheng Long, Ziyue Li, and Rui Zhao. 2025. Eliminating feature ambiguity for few-shot segmentation. In ECCV. 416-433.","journal-title":"ECCV."},{"key":"e_1_3_2_2_38_1","first-page":"655","article-title":"Self-Calibrated Cross Attention Network for Few-Shot Segmentation","author":"Xu Qianxiong","year":"2023","unstructured":"Qianxiong Xu, Wenting Zhao, Guosheng Lin, and Cheng Long. 2023. Self-Calibrated Cross Attention Network for Few-Shot Segmentation. In ICCV. 655-665.","journal-title":"ICCV."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58598-3_45"},{"key":"e_1_3_2_2_40_1","first-page":"8701","article-title":"Mining Latent Classes for Few-shot Segmentation","author":"Yang Lihe","year":"2021","unstructured":"Lihe Yang, Wei Zhuo, Lei Qi, Yinghuan Shi, and Yang Gao. 2021. Mining Latent Classes for Few-shot Segmentation. In ICCV. 8701-8710.","journal-title":"ICCV."},{"key":"e_1_3_2_2_41_1","first-page":"7131","article-title":"MIANet","author":"Yang Yong","year":"2023","unstructured":"Yong Yang, Qiong Chen, Yuan Feng, and Tianlin Huang. 2023. MIANet: Aggregating Unbiased Instance and General Information for Few-Shot Semantic Segmentation. In CVPR. 7131-7140.","journal-title":"In CVPR."},{"key":"e_1_3_2_2_42_1","first-page":"8312","article-title":"Self-Guided and Cross-Guided Learning for Few-Shot Segmentation","author":"Zhang Bingfeng","year":"2021","unstructured":"Bingfeng Zhang, Jimin Xiao, and Terry Qin. 2021b. Self-Guided and Cross-Guided Learning for Few-Shot Segmentation. In CVPR. 8312-8321.","journal-title":"CVPR."},{"key":"e_1_3_2_2_43_1","first-page":"21984","article-title":"Few-shot segmentation via cycle-consistent transformer","volume":"34","author":"Zhang Gengwei","year":"2021","unstructured":"Gengwei Zhang, Guoliang Kang, Yi Yang, and Yunchao Wei. 2021a. Few-shot segmentation via cycle-consistent transformer. In NeurIPS, Vol. 34. 21984-21996.","journal-title":"NeurIPS"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_40"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754967","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:07:24Z","timestamp":1765339644000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754967"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":44,"alternative-id":["10.1145\/3746027.3754967","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754967","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}