{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:08:37Z","timestamp":1765357717844,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"The National Natural Science Foundation of China","award":["62271018"],"award-info":[{"award-number":["62271018"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680654","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:27Z","timestamp":1729925967000},"page":"2117-2126","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Continual Panoptic Perception: Towards Multi-modal Incremental Interpretation of Remote Sensing Images"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7723-4823","authenticated-orcid":false,"given":"Bo","family":"Yuan","sequence":"first","affiliation":[{"name":"Beihang University &amp; Tianmushan Laboratory, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6701-0471","authenticated-orcid":false,"given":"Danpei","family":"Zhao","sequence":"additional","affiliation":[{"name":"Beihang University &amp; Tianmushan Laboratory, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7006-187X","authenticated-orcid":false,"given":"Zhuoran","family":"Liu","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8037-2124","authenticated-orcid":false,"given":"Wentao","family":"Li","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7318-5881","authenticated-orcid":false,"given":"Tian","family":"Li","sequence":"additional","affiliation":[{"name":"Beihang University &amp; Tianmushan Laboratory, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_2_1_2_1","volume-title":"Decomposed knowledge distillation for class-incremental semantic segmentation. Advances in neural information processing systems","author":"Baek Donghyeon","year":"2022","unstructured":"Donghyeon Baek, Youngmin Oh, Sanghoon Lee, Junghyup Lee, and Bumsub Ham. 2022. Decomposed knowledge distillation for class-incremental semantic segmentation. Advances in neural information processing systems, Vol. 35 (2022), 10380--10392."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00812"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00067"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50012-5"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00925"},{"key":"e_1_3_2_1_7_1","volume-title":"SSUL: Semantic segmentation with unknown label for exemplar-based class-incremental learning. Advances in neural information processing systems","author":"Cha Sungmin","year":"2021","unstructured":"Sungmin Cha, YoungJoon Yoo, Taesup Moon, et al. 2021. SSUL: Semantic segmentation with unknown label for exemplar-based class-incremental learning. Advances in neural information processing systems, Vol. 34 (2021), 10919--10930."},{"volume-title":"Riemannian Walk for Incremental Learning: Understanding Forgetting and Intransigence. In European Conference on Computer Vision (ECCV).","author":"Chaudhry Arslan","key":"e_1_3_2_1_8_1","unstructured":"Arslan Chaudhry, Puneet K. Dokania, Thalaiyasingam Ajanthan, and Philip H. S. Torr. 2018. Riemannian Walk for Incremental Learning: Understanding Forgetting and Intransigence. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_2_1_9_1","volume-title":"Philip H. S. Torr, and David Lopez-Paz.","author":"Chaudhry Arslan","year":"2021","unstructured":"Arslan Chaudhry, Albert Gordo, Puneet Kumar Dokania, Philip H. S. Torr, and David Lopez-Paz. 2021. Using Hindsight to Anchor Past Knowledge in Continual Learning. In AAAI."},{"key":"e_1_3_2_1_10_1","volume-title":"Visualgpt: Data-efficient adaptation of pretrained language models for image captioning. In CVPR. 18030--18040.","author":"Chen Jun","year":"2022","unstructured":"Jun Chen, Han Guo, Kai Yi, Boyang Li, and Mohamed Elhoseiny. 2022. Visualgpt: Data-efficient adaptation of pretrained language models for image captioning. In CVPR. 18030--18040."},{"key":"e_1_3_2_1_11_1","volume-title":"Chen Change Loy, and Dahua Lin","author":"Chen Kai","year":"2019","unstructured":"Kai Chen, Jiaqi Wang, Jiangmiao Pang, Yuhang Cao, Yu Xiong, Xiaoxiao Li, Shuyang Sun, Wansen Feng, Ziwei Liu, Jiarui Xu, Zheng Zhang, Dazhi Cheng, Chenchen Zhu, Tianheng Cheng, Qijie Zhao, Buyu Li, Xin Lu, Rui Zhu, Yue Wu, Jifeng Dai, Jingdong Wang, Jianping Shi, Wanli Ouyang, Chen Change Loy, and Dahua Lin. 2019. MMDetection: Open MMLab Detection Toolbox and Benchmark. arXiv preprint arXiv:1906.07155 (2019)."},{"key":"e_1_3_2_1_12_1","volume-title":"Bevdistill: Cross-modal bev distillation for multi-view 3d object detection. arXiv preprint arXiv:2211.09386","author":"Chen Zehui","year":"2022","unstructured":"Zehui Chen, Zhenyu Li, Shiquan Zhang, Liangji Fang, Qinhong Jiang, and Feng Zhao. 2022. Bevdistill: Cross-modal bev distillation for multi-view 3d object detection. arXiv preprint arXiv:2211.09386 (2022)."},{"key":"e_1_3_2_1_13_1","first-page":"17864","article-title":"Per-pixel classification is not all you need for semantic segmentation","volume":"34","author":"Cheng Bowen","year":"2021","unstructured":"Bowen Cheng, Alex Schwing, and Alexander Kirillov. 2021. Per-pixel classification is not all you need for semantic segmentation. Advances in Neural Information Processing Systems, Vol. 34 (2021), 17864--17875.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00368"},{"key":"e_1_3_2_1_15_1","first-page":"16736","article-title":"Ratt: Recurrent attention to transient tasks for continual image captioning","volume":"33","author":"Chiaro Riccardo Del","year":"2020","unstructured":"Riccardo Del Chiaro, Bart\u0142omiej Twardowski, Andrew Bagdanov, and Joost Van de Weijer. 2020. Ratt: Recurrent attention to transient tasks for continual image captioning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 16736--16748.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463022"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00403"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","volume-title":"Image captioning: Transforming objects into words. Advances in neural information processing systems","author":"Herdade Simao","year":"2019","unstructured":"Simao Herdade, Armin Kappeler, Kofi Boakye, and Joao Soares. 2019. Image captioning: Transforming objects into words. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295748"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR). 17980--17989","author":"Hu Xiaowei","year":"2022","unstructured":"Xiaowei Hu, Zhe Gan, Jianfeng Wang, Zhengyuan Yang, Zicheng Liu, Yumao Lu, and Lijuan Wang. 2022. Scaling up vision-language pre-training for image captioning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR). 17980--17989."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00395"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00473"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Chengjia Jiang Tao Wang Sien Li Jinyang Wang Shirui Wang and Antonios Antoniou. 2023. Few-Shot Class-Incremental Semantic Segmentation via Pseudo-Labeling and Knowledge Distillation. In ISPDS. 192--197.","DOI":"10.1109\/ISPDS58840.2023.10235731"},{"volume-title":"Improving Replay-Based Continual Semantic Segmentation with Smart Data Selection","author":"Kalb Tobias","key":"e_1_3_2_1_25_1","unstructured":"Tobias Kalb, Bj\u00f6rn Mauthe, and J\u00fcrgen Beyerer. 2022. Improving Replay-Based Continual Semantic Segmentation with Smart Data Selection. In ITSC. IEEE, 1114--1121."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_41"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25208"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2773081"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_21"},{"volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV). 2999--3007","author":"Lin T.","key":"e_1_3_2_1_30_1","unstructured":"T. Lin, P. Goyal, R. Girshick, K. He, and P. Dollar. 2017. Focal Loss for Dense Object Detection. In Proceedings of the IEEE\/CVF international conference on computer vision (ICCV). 2999--3007."},{"key":"e_1_3_2_1_31_1","volume-title":"Multi-task incremental learning for object detection. arXiv preprint arXiv:2002.05347","author":"Liu Xialei","year":"2020","unstructured":"Xialei Liu, Hao Yang, Avinash Ravichandran, Rahul Bhotika, and Stefano Soatto. 2020. Multi-task incremental learning for object detection. arXiv preprint arXiv:2002.05347 (2020)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_33_1","first-page":"5049","article-title":"Continual Barlow Twins: Continual Self-Supervised Learning for Remote Sensing Semantic Segmentation","volume":"16","author":"Marsocci Valerio","year":"2022","unstructured":"Valerio Marsocci and Simone Scardapane. 2022. Continual Barlow Twins: Continual Self-Supervised Learning for Remote Sensing Semantic Segmentation. IEEE J-STARS, Vol. 16 (2022), 5049--5060.","journal-title":"IEEE J-STARS"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0079-7421(08)60536-8"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00400"},{"volume-title":"V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation. In 2016 Fourth International Conference on 3D Vision (3DV). 565--571","author":"Milletari F.","key":"e_1_3_2_1_36_1","unstructured":"F. Milletari, N. Navab, and S. Ahmadi. 2016. V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation. In 2016 Fourth International Conference on 3D Vision (3DV). 565--571."},{"key":"e_1_3_2_1_37_1","volume-title":"Trung Tran, Tolcha Yalew, and Daeyoung Kim.","author":"Nguyen Giang","year":"2019","unstructured":"Giang Nguyen, Tae Joon Jun, Trung Tran, Tolcha Yalew, and Daeyoung Kim. 2019. Contcap: A scalable framework for continual image captioning. arXiv preprint arXiv:1909.08745 (2019)."},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311--318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311--318."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01636"},{"key":"e_1_3_2_1_40_1","volume-title":"GAPS: Few-Shot Incremental Semantic Segmentation via Guided Copy-Paste Synthesis. CVPRW","author":"Qiu Ri-Zhao","year":"2023","unstructured":"Ri-Zhao Qiu, Peiyi Chen, Wangzhe Sun, Yu-Xiong Wang, and Kris Hauser. 2023. GAPS: Few-Shot Incremental Semantic Segmentation via Guided Copy-Paste Synthesis. CVPRW (2023)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00477"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01312"},{"key":"e_1_3_2_1_43_1","unstructured":"David Rolnick Arun Ahuja Jonathan Schwarz Timothy P. Lillicrap and Greg Wayne. 2019. Experience Replay for Continual Learning. In Advances in neural information processing systems."},{"key":"e_1_3_2_1_44_1","volume-title":"MiCro: Modeling Cross-Image Semantic Relationship Dependencies for Class-Incremental Semantic Segmentation in Remote Sensing Images","author":"Rong Xuee","year":"2023","unstructured":"Xuee Rong, Peijin Wang, Wenhui Diao, Yiran Yang, Wenxin Yin, Xuan Zeng, Hongqi Wang, and Xian Sun. 2023. MiCro: Modeling Cross-Image Semantic Relationship Dependencies for Class-Incremental Semantic Segmentation in Remote Sensing Images. IEEE Transactions on Geoscience and Remote Sensing (2023)."},{"key":"e_1_3_2_1_45_1","volume-title":"An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098","author":"Ruder Sebastian","year":"2017","unstructured":"Sebastian Ruder. 2017. An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098 (2017)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00697"},{"key":"e_1_3_2_1_47_1","volume-title":"Incremental Few-Shot Semantic Segmentation via Embedding Adaptive-Update and Hyper-class Representation. ACM MM","author":"Shi Guangchen","year":"2022","unstructured":"Guangchen Shi, Yirui Wu, J. Liu, Shaohua Wan, Wenhai Wang, and Tong Lu. 2022. Incremental Few-Shot Semantic Segmentation via Embedding Adaptive-Update and Hyper-class Representation. ACM MM (2022)."},{"key":"e_1_3_2_1_48_1","first-page":"8728","article-title":"Adashare: Learning what to share for efficient deep multi-task learning","volume":"33","author":"Sun Ximeng","year":"2020","unstructured":"Ximeng Sun, Rameswar Panda, Rogerio Feris, and Kate Saenko. 2020. Adashare: Learning what to share for efficient deep multi-task learning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 8728--8740.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_49_1","volume-title":"European Conference on Computer Vision (ECCV). Springer, 369--384","author":"Tian Mu","year":"2022","unstructured":"Mu Tian, Qinzhu Yang, and Yi Gao. 2022. Multi-scale Multi-task Distillation for Incremental 3D Medical Image Segmentation. In European Conference on Computer Vision (ECCV). Springer, 369--384."},{"key":"e_1_3_2_1_50_1","volume-title":"Rethinking exemplars for continual semantic segmentation in endoscopy scenes: Entropy-based mini-batch pseudo-replay. Computers in Biology and Medicine","author":"Wang Guankun","year":"2023","unstructured":"Guankun Wang, Long Bai, Yanan Wu, Tong Chen, and Hongliang Ren. 2023. Rethinking exemplars for continual semantic segmentation in endoscopy scenes: Entropy-based mini-batch pseudo-replay. Computers in Biology and Medicine (2023), 107412."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01563"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25360"},{"volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR). 7204--7213","author":"Xiao Wen","key":"e_1_3_2_1_53_1","unstructured":"Jia--Wen Xiao, Chang--Bin Zhang, Jiekang Feng, Xialei Liu, Joost van de Weijer, and Ming--Ming Cheng. 2023. Endpoints Weight Fusion for Class Incremental Semantic Segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR). 7204--7213."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2915033"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26275"},{"key":"e_1_3_2_1_56_1","volume-title":"Challenge, Method and Application. arXiv preprint arXiv:2310.14277","author":"Yuan Bo","year":"2023","unstructured":"Bo Yuan and Danpei Zhao. 2023. A Survey on Continual Semantic Segmentation: Theory, Challenge, Method and Application. arXiv preprint arXiv:2310.14277 (2023)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3162471"},{"key":"e_1_3_2_1_58_1","volume-title":"Learning At a Glance: Towards Interpretable Data-Limited Continual Semantic Segmentation Via Semantic-Invariance Modelling","author":"Yuan Bo","year":"2024","unstructured":"Bo Yuan, Danpei Zhao, and Zhenwei Shi. 2024. Learning At a Glance: Towards Interpretable Data-Limited Continual Semantic Segmentation Via Semantic-Invariance Modelling. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024), 1--16."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00692"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01831"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwx105"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3070203"},{"key":"e_1_3_2_1_63_1","volume-title":"Mining Unseen Classes via Regional Objectness: A Simple Baseline for Incremental Segmentation. Advances in neural information processing systems","author":"Zhang Zekang","year":"2022","unstructured":"Zekang Zhang, Guangyu Gao, Zhiyuan Fang, Jianbo Jiao, and Yunchao Wei. 2022. Mining Unseen Classes via Regional Objectness: A Simple Baseline for Incremental Segmentation. Advances in neural information processing systems, Vol. 35 (2022), 24340--24353."},{"key":"e_1_3_2_1_64_1","first-page":"1","article-title":"Panoptic Perception: A Novel Task and Fine-Grained Dataset for Universal Remote Sensing Image Interpretation","volume":"62","author":"Zhao Danpei","year":"2024","unstructured":"Danpei Zhao, Bo Yuan, Ziqiang Chen, Tian Li, Zhuoran Liu, Wentao Li, and Yue Gao. 2024. Panoptic Perception: A Novel Task and Fine-Grained Dataset for Universal Remote Sensing Image Interpretation. IEEE Transactions on Geoscience and Remote Sensing, Vol. 62 (2024), 1--14.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3273574"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-53305-1_19"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680654","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680654","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:57Z","timestamp":1750295877000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680654"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":66,"alternative-id":["10.1145\/3664647.3680654","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680654","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}