{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T17:58:01Z","timestamp":1772301481025,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"China Mobile Zijin Innovation Insititute","award":["No. NR2310J7M"],"award-info":[{"award-number":["No. NR2310J7M"]}]},{"name":"the National Natural Science Foundation of China","award":["No. 62372223"],"award-info":[{"award-number":["No. 62372223"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681338","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"5374-5383","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Few-shot Semantic Segmentation via Perceptual Attention and Spatial Control"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2568-4130","authenticated-orcid":false,"given":"Guangchen","family":"Shi","sequence":"first","affiliation":[{"name":"National Key Lab for Novel Software Technology, Nanjing University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2411-6170","authenticated-orcid":false,"given":"Wei","family":"Zhu","sequence":"additional","affiliation":[{"name":"China Mobile Zijin Innovation Insititute, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3022-3718","authenticated-orcid":false,"given":"Yirui","family":"Wu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Software Engineering, Hohai University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1783-4298","authenticated-orcid":false,"given":"Danhuai","family":"Zhao","sequence":"additional","affiliation":[{"name":"China Mobile Zijin Innovation Insititute, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5324-7596","authenticated-orcid":false,"given":"Kang","family":"Zheng","sequence":"additional","affiliation":[{"name":"China Mobile Zijin Innovation Insititute, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7051-5347","authenticated-orcid":false,"given":"Tong","family":"Lu","sequence":"additional","affiliation":[{"name":"National Key Lab for Novel Software Technology, Nanjing University, Nanjing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3261387"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of International Conference on Learning Representations.","author":"Baranchuk Dmitry","year":"2022","unstructured":"Dmitry Baranchuk, Andrey Voynov, Ivan Rubachev, Valentin Khrulkov, and Artem Babenko. 2022. Label-Efficient Semantic Segmentation with Diffusion Models. In Proceedings of International Conference on Learning Representations."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"e_1_3_2_1_4_1","volume-title":"Pan Gao, and Xiaojiang Peng.","author":"Fu Lihua","year":"2022","unstructured":"Lihua Fu, Haoyue Tian, Xiangping Bryce Zhai, Pan Gao, and Xiaojiang Peng. 2022. IncepFormer: Efficient Inception Transformer with Pyramid Pooling for Semantic Segmentation. CoRR, Vol. abs\/2212.03035 (2022)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME55011.2023.00148"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3295731"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10584-0_20"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19818-2_7"},{"key":"e_1_3_2_1_9_1","volume-title":"Tag2Text: Guiding Vision-Language Model via Image Tagging. CoRR","author":"Huang Xinyu","year":"2023","unstructured":"Xinyu Huang, Youcai Zhang, Jinyu Ma, Weiwei Tian, Rui Feng, Yuejie Zhang, Yaqian Li, Yandong Guo, and Lei Zhang. 2023. Tag2Text: Guiding Vision-Language Model via Image Tagging. CoRR, Vol. abs\/2303.05657 (2023)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2021.103334"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of International Conference on Learning Representations.","author":"Diederik","unstructured":"Diederik P. Kingma and Max Welling. 2014. Auto-Encoding Variational Bayes. In Proceedings of International Conference on Learning Representations."},{"key":"e_1_3_2_1_12_1","volume-title":"Segment Anything. In IEEE\/CVF International Conference on Computer Vision, ICCV 2023","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chlo\u00e9 Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander C. Berg, Wan-Yen Lo, Piotr Doll\u00e1r, and Ross B. Girshick. 2023. Segment Anything. In IEEE\/CVF International Conference on Computer Vision, ICCV 2023, Paris, France, October 1--6, 2023. 3992--4003."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.28068"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01875-x"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00858"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3374048"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of European Conference on Computer Vision. 740--755","author":"Lin Tsung-Yi","unstructured":"Tsung-Yi Lin, Michael Maire, Serge J. Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In Proceedings of European Conference on Computer Vision. 740--755."},{"key":"e_1_3_2_1_18_1","volume-title":"WegFormer: Transformers for Weakly Supervised Semantic Segmentation. CoRR","author":"Liu Chunmeng","year":"2022","unstructured":"Chunmeng Liu, Enze Xie, Wenjia Wang, Wenhai Wang, Guangyao Li, and Ping Luo. 2022. WegFormer: Transformers for Weakly Supervised Semantic Segmentation. CoRR, Vol. abs\/2203.08421 (2022)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01126"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of International Conference on Machine Learning, Jennifer G. Dy and Andreas Krause (Eds.). 3546--3555","author":"Michaelis Claudio","unstructured":"Claudio Michaelis, Matthias Bethge, and Alexander S. Ecker. 2018. One-Shot Segmentation in Clutter. In Proceedings of International Conference on Machine Learning, Jennifer G. Dy and Andreas Krause (Eds.). 3546--3555."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-00937-3_76"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of International Conference on Machine Learning. 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proceedings of International Conference on Machine Learning. 8748--8763."},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of International Conference on Learning Representations.","author":"Rakelly Kate","year":"2018","unstructured":"Kate Rakelly, Evan Shelhamer, Trevor Darrell, Alyosha A. Efros, and Sergey Levine. 2018. Conditional Networks for Few-Shot Semantic Segmentation. In Proceedings of International Conference on Learning Representations."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_26_1","volume-title":"Few-shot learning using diffusion based synthesis and alignment. CoRR","author":"Roy Aniket","year":"2022","unstructured":"Aniket Roy, Anshul Shah, Ketul Shah, Anirban Roy, and Rama Chellappa. 2022. DiffAlign : Few-shot learning using diffusion based synthesis and alignment. CoRR, Vol. abs\/2212.05404 (2022)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.31.167"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548218"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME51207.2021.9428425"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20044-1_9"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3013717"},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition Workshops. 426--433","author":"Visin Francesco","unstructured":"Francesco Visin, Adriana Romero, Kyunghyun Cho, Matteo Matteucci, Marco Ciccone, Kyle Kastner, Yoshua Bengio, and Aaron C. Courville. 2016. ReSeg: A Recurrent Neural Network-Based Model for Semantic Segmentation. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition Workshops. 426--433."},{"key":"e_1_3_2_1_33_1","volume-title":"Diffusion Model is Secretly a Training-free Open Vocabulary Semantic Segmenter. CoRR","author":"Wang Jinglong","year":"2023","unstructured":"Jinglong Wang, Xiawei Li, Jing Zhang, Qingyuan Xu, Qin Zhou, Qian Yu, Lu Sheng, and Dong Xu. 2023. Diffusion Model is Secretly a Training-free Open Vocabulary Semantic Segmenter. CoRR, Vol. abs\/2309.02773 (2023)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19818-2_3"},{"key":"e_1_3_2_1_35_1","volume-title":"MedSegDiff: Medical Image Segmentation with Diffusion Probabilistic Model. CoRR","author":"Wu Junde","year":"2022","unstructured":"Junde Wu, Huihui Fang, Yu Zhang, Yehui Yang, and Yanwu Xu. 2022. MedSegDiff: Medical Image Segmentation with Diffusion Probabilistic Model. CoRR, Vol. abs\/2211.00611 (2022)."},{"key":"e_1_3_2_1_36_1","first-page":"1623","article-title":"MedSegDiff: Medical Image Segmentation with Diffusion Probabilistic Model","volume":"227","author":"Wu Junde","year":"2023","unstructured":"Junde Wu, Rao Fu, Huihui Fang, Yu Zhang, Yehui Yang, Haoyi Xiong, Huiying Liu, and Yanwu Xu. 2023. MedSegDiff: Medical Image Segmentation with Diffusion Probabilistic Model. In Proceedings of Medical Imaging with Deep Learning, Vol. 227. 1623--1639.","journal-title":"Proceedings of Medical Imaging with Deep Learning"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28418"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.164"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28465"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00689"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00536"},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of NAnnual Conference on Neural Information Processing Systems. 21984--21996","author":"Zhang Gengwei","year":"2021","unstructured":"Gengwei Zhang, Guoliang Kang, Yi Yang, and Yunchao Wei. 2021. Few-Shot Segmentation via Cycle-Consistent Transformer. In Proceedings of NAnnual Conference on Neural Information Processing Systems. 21984--21996."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00747"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3193612"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28572"},{"key":"e_1_3_2_1_47_1","volume-title":"Quaternion-valued Correlation Learning for Few-Shot Semantic Segmentation","author":"Zheng Zewen","year":"2022","unstructured":"Zewen Zheng, Guoheng Huang, Xiaochen Yuan, Chi-Man Pun, Hongrui Liu, and Wing-Kuen Ling. 2022. Quaternion-valued Correlation Learning for Few-Shot Semantic Segmentation. IEEE Trans. Circuits Syst. Video Technol. (2022), 1--1."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681338","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681338","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:43Z","timestamp":1750295863000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681338"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":47,"alternative-id":["10.1145\/3664647.3681338","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681338","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}