{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:09:34Z","timestamp":1750219774284,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592297","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"472-480","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning with Adaptive Knowledge for Continual Image-Text Modeling"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8707-0180","authenticated-orcid":false,"given":"Yutian","family":"Luo","sequence":"first","affiliation":[{"name":"Renmin University of China, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6903-2962","authenticated-orcid":false,"given":"Yizhao","family":"Gao","sequence":"additional","affiliation":[{"name":"Renmin University of China, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0280-7724","authenticated-orcid":false,"given":"Zhiwu","family":"Lu","sequence":"additional","affiliation":[{"name":"Renmin University of China, China"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_9"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.753"},{"key":"e_1_3_2_2_3_1","first-page":"11817","article-title":"Gradient based sample selection for online continual learning","volume":"32","author":"Aljundi Rahaf","year":"2019","unstructured":"Rahaf Aljundi, Min Lin, Baptiste Goujaud, and Yoshua Bengio. 2019. Gradient based sample selection for online continual learning. Advances in Neural Information Processing Systems 32 (2019), 11817\u201311826.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_4_1","volume-title":"Pseudo-recursal: Solving the catastrophic forgetting problem in deep neural networks. arXiv preprint arXiv:1802.03875","author":"Atkinson Craig","year":"2018","unstructured":"Craig Atkinson, Brendan McCane, Lech Szymanski, and Anthony Robins. 2018. Pseudo-recursal: Solving the catastrophic forgetting problem in deep neural networks. arXiv preprint arXiv:1802.03875 (2018)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01275"},{"key":"e_1_3_2_2_7_1","first-page":"15920","article-title":"Dark experience for general continual learning: a strong, simple baseline","volume":"33","author":"Buzzega Pietro","year":"2020","unstructured":"Pietro Buzzega, Matteo Boschini, Angelo Porrello, Davide Abati, and Simone Calderara. 2020. Dark experience for general continual learning: a strong, simple baseline. Advances in Neural Information Processing Systems 33 (2020), 15920\u201315930.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00938"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_33"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i8.16861"},{"key":"e_1_3_2_2_11_1","volume-title":"On tiny episodic memories in continual learning. arXiv preprint arXiv:1902.10486","author":"Chaudhry Arslan","year":"2019","unstructured":"Arslan Chaudhry, Marcus Rohrbach, Mohamed Elhoseiny, Thalaiyasingam Ajanthan, Puneet\u00a0K Dokania, Philip\u00a0HS Torr, and Marc\u2019Aurelio Ranzato. 2019. On tiny episodic memories in continual learning. arXiv preprint arXiv:1902.10486 (2019)."},{"key":"e_1_3_2_2_12_1","volume-title":"IMRAM: Iterative Matching With Recurrent Attention Memory for Cross-Modal Image-Text Retrieval. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 12652\u201312660","author":"Chen Hui","year":"2020","unstructured":"Hui Chen, Guiguang Ding, Xudong Liu, Zijia Lin, Ji Liu, and Jungong Han. 2020. IMRAM: Iterative Matching With Recurrent Attention Memory for Cross-Modal Image-Text Retrieval. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 12652\u201312660."},{"key":"e_1_3_2_2_13_1","volume-title":"International Conference on Machine Learning. 1597\u20131607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International Conference on Machine Learning. 1597\u20131607."},{"key":"e_1_3_2_2_14_1","first-page":"3366","article-title":"A continual learning survey: Defying forgetting in classification tasks","volume":"44","author":"De\u00a0Lange Matthias","year":"2021","unstructured":"Matthias De\u00a0Lange, Rahaf Aljundi, Marc Masana, Sarah Parisot, Xu Jia, Ale\u0161 Leonardis, Gregory Slabaugh, and Tinne Tuytelaars. 2021. A continual learning survey: Defying forgetting in classification tasks. TPAMI 44, 7 (2021), 3366\u20133385.","journal-title":"TPAMI"},{"key":"e_1_3_2_2_15_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_16_1","first-page":"22605","article-title":"Coot: Cooperative hierarchical transformer for video-text representation learning","volume":"33","author":"Ging Simon","year":"2020","unstructured":"Simon Ging, Mohammadreza Zolfaghari, Hamed Pirsiavash, and Thomas Brox. 2020. Coot: Cooperative hierarchical transformer for video-text representation learning. Advances in Neural Information Processing Systems 33 (2020), 22605\u201322618.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_18_1","volume-title":"WenLan: Bridging vision and language by large-scale multi-modal pre-training. arXiv preprint arXiv:2103.06561","author":"Huo Yuqi","year":"2021","unstructured":"Yuqi Huo, Manli Zhang, Guangzhen Liu, Haoyu Lu, Yizhao Gao, Guoxing Yang, Jingyuan Wen, Heng Zhang, Baogui Xu, Weihao Zheng, 2021. WenLan: Bridging vision and language by large-scale multi-modal pre-training. arXiv preprint arXiv:2103.06561 (2021)."},{"key":"e_1_3_2_2_19_1","volume-title":"International Conference on Machine Learning. 4904\u20134916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International Conference on Machine Learning. 4904\u20134916."},{"key":"e_1_3_2_2_20_1","volume-title":"Guiding the Long-Short Term Memory Model for Image Caption Generation. In IEEE International Conference on Computer Vision. 2407\u20132415","author":"Jia Xu","year":"2015","unstructured":"Xu Jia, Efstratios Gavves, Basura Fernando, and Tinne Tuytelaars. 2015. Guiding the Long-Short Term Memory Model for Image Caption Generation. In IEEE International Conference on Computer Vision. 2407\u20132415."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"e_1_3_2_2_23_1","volume-title":"Continual classification learning using generative models. arXiv preprint arXiv:1810.10612","author":"Lavda Frantzeska","year":"2018","unstructured":"Frantzeska Lavda, Jason Ramapuram, Magda Gregorova, and Alexandros Kalousis. 2018. Continual classification learning using generative models. arXiv preprint arXiv:1810.10612 (2018)."},{"key":"e_1_3_2_2_24_1","volume-title":"Stacked Cross Attention for Image-Text Matching. ArXiv abs\/1803.08024","author":"Lee Kuang-Huei","year":"2018","unstructured":"Kuang-Huei Lee, Xi Chen, Gang Hua, Houdong Hu, and Xiaodong He. 2018. Stacked Cross Attention for Image-Text Matching. ArXiv abs\/1803.08024 (2018)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00725"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2773081"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01226"},{"key":"e_1_3_2_2_30_1","first-page":"13","article-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","volume":"32","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in Neural Information Processing Systems 32 (2019), 13\u201323.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_5"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00810"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00160"},{"key":"e_1_3_2_2_34_1","volume-title":"International Conference on Machine Learning. 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. 8748\u20138763."},{"key":"e_1_3_2_2_35_1","volume-title":"Lifelong Generative Modeling. ArXiv abs\/1705.09847","author":"Ramapuram Jason","year":"2020","unstructured":"Jason Ramapuram, Magda Gregorov\u00e1, and Alexandros Kalousis. 2020. Lifelong Generative Modeling. ArXiv abs\/1705.09847 (2020)."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.148"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.587"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2884462"},{"key":"e_1_3_2_2_39_1","volume-title":"International Conference on Machine Learning. 4548\u20134557","author":"Serra Joan","year":"2018","unstructured":"Joan Serra, Didac Suris, Marius Miron, and Alexandros Karatzoglou. 2018. Overcoming catastrophic forgetting with hard attention to the task. In International Conference on Machine Learning. 4548\u20134557."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"key":"e_1_3_2_2_41_1","first-page":"2994","article-title":"Continual learning with deep generative replay","volume":"30","author":"Shin Hanul","year":"2017","unstructured":"Hanul Shin, Jung\u00a0Kwon Lee, Jaehong Kim, and Jiwon Kim. 2017. Continual learning with deep generative replay. Advances in Neural Information Processing Systems 30 (2017), 2994\u20133003.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463257"},{"key":"e_1_3_2_2_43_1","volume-title":"Lxmert: Learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490","author":"Tan Hao","year":"2019","unstructured":"Hao Tan and Mohit Bansal. 2019. Lxmert: Learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490 (2019)."},{"volume-title":"IEEE Conference on Computer Vision and Pattern Recognition. 3156\u20133164","author":"Vinyals Oriol","key":"e_1_3_2_2_44_1","unstructured":"Oriol Vinyals, Alexander Toshev, Samy Bengio, and D. Erhan. 2015. Show and tell: A neural image caption generator. In IEEE Conference on Computer Vision and Pattern Recognition. 3156\u20133164."},{"key":"e_1_3_2_2_45_1","first-page":"907","article-title":"Reinforced continual learning","volume":"31","author":"Xu Ju","year":"2018","unstructured":"Ju Xu and Zhanxing Zhu. 2018. Reinforced continual learning. Advances in Neural Information Processing Systems 31 (2018), 907\u2013916.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093365"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00877"}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Thessaloniki Greece","acronym":"ICMR '23"},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592297","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592297","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:30Z","timestamp":1750178250000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592297"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":48,"alternative-id":["10.1145\/3591106.3592297","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592297","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}