{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T09:53:48Z","timestamp":1761126828165,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Migu Culture Technology Co."},{"name":"Beijing Natural Science Foundation","award":["L233008"],"award-info":[{"award-number":["L233008"]}]},{"name":"Research Funds of Renmin University of China","award":["21XNLG28"],"award-info":[{"award-number":["21XNLG28"]}]},{"name":"Outstanding Innovative Talents Cultivation Funded Programs 2024 of Renmin University of China"},{"name":"Huawei Poisson Lab"},{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276268"],"award-info":[{"award-number":["62276268"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589334.3645711","type":"proceedings-article","created":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T07:08:13Z","timestamp":1715152093000},"page":"3952-3963","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Understanding Human Preferences: Towards More Personalized Video to Text Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-0312-782X","authenticated-orcid":false,"given":"Yihan","family":"Wu","sequence":"first","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2163-7401","authenticated-orcid":false,"given":"Ruihua","family":"Song","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0144-1775","authenticated-orcid":false,"given":"Xu","family":"Chen","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2306-7596","authenticated-orcid":false,"given":"Hao","family":"Jiang","sequence":"additional","affiliation":[{"name":"Poisson Lab, Huawei, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4214-7858","authenticated-orcid":false,"given":"Zhao","family":"Cao","sequence":"additional","affiliation":[{"name":"Poisson Lab, Huawei, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3371-6609","authenticated-orcid":false,"given":"Jin","family":"Yu","sequence":"additional","affiliation":[{"name":"Poisson Lab, Huawei, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"unstructured":"Jean-Baptiste Alayrac Jeff Donahue Pauline Luc Antoine Miech Iain Barr Yana Hasson Karel Lenc Arthur Mensch Katherine Millican Malcolm Reynolds Roman Ring Eliza Rutherford Serkan Cabi Tengda Han Zhitao Gong Sina Samangooei Marianne Monteiro Jacob L. Menick Sebastian Borgeaud Andy Brock Aida Nematzadeh Sahand Sharifzadeh Mikolaj Binkowski Ricardo Barreira Oriol Vinyals Andrew Zisserman and Kar\u00e9 n Simonyan. 2022. Flamingo: a Visual Language Model for Few-Shot Learning. In NeurIPS. http:\/\/papers.nips.cc\/paper_files\/paper\/2022\/hash\/960a172bc7fbf0177ccccbb411a7d800-Abstract-Conference.html","key":"e_1_3_2_2_1_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_2_1","DOI":"10.1145\/3331184.3331254"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_3_1","DOI":"10.1145\/3077136.3080776"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_4_1","DOI":"10.1145\/2348283.2348470"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_5_1","DOI":"10.1109\/CVPR.2009.5206848"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_6_1","DOI":"10.3115\/v1\/W14-3348"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_7_1","DOI":"10.18653\/v1\/n19--1423"},{"key":"e_1_3_2_2_8_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_9_1","DOI":"10.3233\/FAIA200320"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_10_1","DOI":"10.1109\/CVPR.2019.00210"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_11_1","DOI":"10.1007\/978--3-031-05981-0_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_12_1","DOI":"10.1007\/s11280-018-0558--1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_13_1","DOI":"10.1109\/CVPRW50498.2020.00487"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_14_1","DOI":"10.32473\/flairs.v34i1.128380"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_15_1","DOI":"10.48550\/arXiv.2301.12597"},{"key":"e_1_3_2_2_16_1","volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning, ICML 2022","volume":"12900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven C. H. Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning, ICML 2022, 17--23 July 2022, Baltimore, Maryland, USA (Proceedings of Machine Learning Research, Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesv\u00e1 ri, Gang Niu, and Sivan Sabato (Eds.). PMLR, 12888--12900. https:\/\/proceedings.mlr.press\/v162\/li22n.html"},{"key":"e_1_3_2_2_17_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_18_1","DOI":"10.1109\/CVPR52688.2022.01742"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_19_1","DOI":"10.2200\/S00416ED1V01Y201204HLT016"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_20_1","DOI":"10.1145\/3240323.3240365"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_21_1","DOI":"10.1007\/978--3-030--16142--2_32"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_22_1","DOI":"10.1609\/aaai.v33i01.33016810"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_23_1","DOI":"10.3115\/1073083.1073135"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_24_1","DOI":"10.1109\/CVPR.2017.681"},{"key":"e_1_3_2_2_25_1","volume-title":"UniWalk: Explainable and Accurate Recommendation for Rating and Network Data. CoRR","author":"Park Haekyu","year":"2017","unstructured":"Haekyu Park, Hyunsik Jeon, Junghwan Kim, Beunguk Ahn, and U Kang. 2017a. UniWalk: Explainable and Accurate Recommendation for Rating and Network Data. CoRR, Vol. abs\/1710.07134 (2017). showeprint[arXiv]1710.07134 http:\/\/arxiv.org\/abs\/1710.07134"},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24","volume":"8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24 July 2021, Virtual Event (Proceedings of Machine Learning Research, Vol. 139). PMLR, 8748--8763. http:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_27_1","DOI":"10.1145\/3018661.3018686"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_28_1","DOI":"10.1109\/CVPR.2019.01280"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_29_1","DOI":"10.1109\/CVPR.2015.7299087"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_30_1","DOI":"10.1145\/3209978.3210010"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_31_1","DOI":"10.1145\/3394171.3413890"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1155\/2021\/2630254","article-title":"Analysis of User Personalized Retrieval of Multimedia Digital Archives Dependent on BP Neural Network Algorithm","volume":"2021","author":"Wang Zhongke","year":"2021","unstructured":"Zhongke Wang. 2021. Analysis of User Personalized Retrieval of Multimedia Digital Archives Dependent on BP Neural Network Algorithm. Advances in Multimedia, Vol. 2021 (2021), 1--7.","journal-title":"Advances in Multimedia"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_33_1","DOI":"10.18653\/v1\/2021.maiworkshop-1.8"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_34_1","DOI":"10.24963\/ijcai.2019\/198"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_35_1","DOI":"10.1145\/3123266.3123427"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_36_1","DOI":"10.1109\/WI-IAT.2010.131"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_37_1","DOI":"10.1609\/aaai.v34i05.6503"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_38_1","DOI":"10.1109\/CVPR42600.2020.01329"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_39_1","DOI":"10.1145\/2623330.2623351"},{"doi-asserted-by":"crossref","unstructured":"Wayne Xin Zhao Shanlei Mu Yupeng Hou Zihan Lin Yushuo Chen Xingyu Pan Kaiyuan Li Yujie Lu Hui Wang Changxin Tian Yingqian Min Zhichao Feng Xinyan Fan Xu Chen Pengfei Wang Wendi Ji Yaliang Li Xiaoling Wang and Ji-Rong Wen. 2021. RecBole: Towards a Unified Comprehensive and Efficient Framework for Recommendation Algorithms. In CIKM. ACM 4653--4664.","key":"e_1_3_2_2_40_1","DOI":"10.1145\/3459637.3482016"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_41_1","DOI":"10.48550\/arXiv.2301.11367"},{"key":"e_1_3_2_2_42_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."}],"event":{"sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"acronym":"WWW '24","name":"WWW '24: The ACM Web Conference 2024","location":"Singapore Singapore"},"container-title":["Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645711","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589334.3645711","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:30:13Z","timestamp":1755822613000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645711"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":42,"alternative-id":["10.1145\/3589334.3645711","10.1145\/3589334"],"URL":"https:\/\/doi.org\/10.1145\/3589334.3645711","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}