{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,16]],"date-time":"2026-07-16T05:19:08Z","timestamp":1784179148779,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","funder":[{"name":"Hong Kong Research Grants Council's Research Impact Fund","award":["No.R1015-23"],"award-info":[{"award-number":["No.R1015-23"]}]},{"name":"Collaborative Research Fund","award":["No.C1043-24GF"],"award-info":[{"award-number":["No.C1043-24GF"]}]},{"name":"General Research Fund","award":["No.11218325"],"award-info":[{"award-number":["No.11218325"]}]},{"name":"Institute of Digital Medicine of City University of Hong Kong","award":["No.9229503"],"award-info":[{"award-number":["No.9229503"]}]},{"name":"CCF-Tencent Open Fund"},{"name":"Tencent Rhino-Bird Focused Research Program"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761169","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T00:29:28Z","timestamp":1762561768000},"page":"3209-3219","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Empowering Large Language Model for Sequential Recommendation via Multimodal Embeddings and Semantic IDs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6051-8659","authenticated-orcid":false,"given":"Yuhao","family":"Wang","sequence":"first","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2697-7012","authenticated-orcid":false,"given":"Junwei","family":"Pan","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8294-0589","authenticated-orcid":false,"given":"Xinhang","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0073-0172","authenticated-orcid":false,"given":"Maolin","family":"Wang","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4065-1717","authenticated-orcid":false,"given":"Yuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2595-4893","authenticated-orcid":false,"given":"Yue","family":"Liu","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2973-9167","authenticated-orcid":false,"given":"Dapeng","family":"Liu","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9658-5127","authenticated-orcid":false,"given":"Jie","family":"Jiang","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2926-4416","authenticated-orcid":false,"given":"Xiangyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3608857"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637871"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.5555\/3692070.3692741"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/30.1-2.81"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01123"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3608779"},{"key":"e_1_3_2_1_9_1","volume-title":"CTRL: Connect Collaborative and Language Model for CTR Prediction. ACM Transactions on Recommender Systems","author":"Li Xiangyang","year":"2023","unstructured":"Xiangyang Li, Bo Chen, Lu Hou, and Ruiming Tang. 2023a. CTRL: Connect Collaborative and Language Model for CTR Prediction. ACM Transactions on Recommender Systems (2023)."},{"key":"e_1_3_2_1_10_1","volume-title":"E4srec: An elegant effective efficient extensible solution of large language models for sequential recommendation. arXiv preprint arXiv:2312.02443","author":"Li Xinhang","year":"2023","unstructured":"Xinhang Li, Chong Chen, Xiangyu Zhao, Yong Zhang, and Chunxiao Xing. 2023b. E4srec: An elegant effective efficient extensible solution of large language models for sequential recommendation. arXiv preprint arXiv:2312.02443 (2023)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615137"},{"key":"e_1_3_2_1_12_1","volume-title":"An Empirical Study of Training ID-Agnostic Multi-modal Sequential Recommenders. arXiv preprint arXiv:2403.17372","author":"Li Youhua","year":"2024","unstructured":"Youhua Li, Hanwen Du, Yongxin Ni, Yuanqi He, Junchen Fu, Xiangyan Liu, and Qi Guo. 2024. An Empirical Study of Training ID-Agnostic Multi-modal Sequential Recommenders. arXiv preprint arXiv:2403.17372 (2024)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657690"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583339"},{"key":"e_1_3_2_1_15_1","volume-title":"Disentangled Representation with Cross Experts Covariance Loss for Multi-Domain Recommendation. arXiv preprint arXiv:2405.12706","author":"Lin Zhutian","year":"2024","unstructured":"Zhutian Lin, Junwei Pan, Haibin Yu, Xi Xiao, Ximei Wang, Zhixiang Feng, Shifeng Wen, Shudong Huang, Lei Xiao, and Jie Jiang. 2024. Disentangled Representation with Cross Experts Covariance Loss for Multi-Domain Recommendation. arXiv preprint arXiv:2405.12706 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591717"},{"key":"e_1_3_2_1_17_1","first-page":"1","article-title":"Multimodal recommender systems: A survey","volume":"57","author":"Liu Qidong","year":"2024","unstructured":"Qidong Liu, Jiaxi Hu, Yutian Xiao, Xiangyu Zhao, Jingtong Gao, Wanyu Wang, Qing Li, and Jiliang Tang. 2024a. Multimodal recommender systems: A survey. Comput. Surveys, Vol. 57, 2 (2024), 1-17.","journal-title":"Comput. Surveys"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i11.33327"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0839"},{"key":"e_1_3_2_1_20_1","volume-title":"2024 e. Large language models enhanced sequential recommendation for long-tail user and item. arXiv e-prints","author":"Liu Qidong","year":"2024","unstructured":"Qidong Liu, Xian Wu, Xiangyu Zhao, Yejing Wang, Zijian Zhang, Feng Tian, and Yefeng Zheng. 2024 e. Large language models enhanced sequential recommendation for long-tail user and item. arXiv e-prints (2024), arXiv-2405."},{"key":"e_1_3_2_1_21_1","volume-title":"2024 f. Large language model distilling medication recommendation model. arXiv preprint arXiv:2402.02803","author":"Liu Qidong","year":"2024","unstructured":"Qidong Liu, Xian Wu, Xiangyu Zhao, Yuanshao Zhu, Zijian Zhang, Feng Tian, and Yefeng Zheng. 2024 f. Large language model distilling medication recommendation model. arXiv preprint arXiv:2402.02803 (2024)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3729911"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583244"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679626"},{"key":"e_1_3_2_1_25_1","volume-title":"Bidirectional gated mamba for sequential recommendation. arXiv e-prints","author":"Liu Ziwei","year":"2024","unstructured":"Ziwei Liu, Qidong Liu, Yejing Wang, Wanyu Wang, Pengyue Jia, Maolin Wang, Zitao Liu, Yi Chang, and Xiangyu Zhao. 2024b. Bidirectional gated mamba for sequential recommendation. arXiv e-prints (2024), arXiv-2408."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i12.33336"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657829"},{"key":"e_1_3_2_1_28_1","volume-title":"International conference on machine learning. PMLR, 97-105","author":"Long Mingsheng","year":"2015","unstructured":"Mingsheng Long, Yue Cao, Jianmin Wang, and Michael Jordan. 2015. Learning transferable features with deep adaptation networks. In International conference on machine learning. PMLR, 97-105."},{"key":"e_1_3_2_1_29_1","unstructured":"Ilya Loshchilov Frank Hutter et al. 2017. Fixing weight decay regularization in adam. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_30_1","volume-title":"QARM: Quantitative Alignment Multi-Modal Recommendation at Kuaishou. arXiv preprint arXiv:2411.11739","author":"Luo Xinchen","year":"2024","unstructured":"Xinchen Luo, Jiangxia Cao, Tianyu Sun, Jinkai Yu, Rui Huang, Wei Yuan, Hezheng Lin, Yichen Zheng, Shiyao Wang, Qigen Hu, et al., 2024. QARM: Quantitative Alignment Multi-Modal Recommendation at Kuaishou. arXiv preprint arXiv:2411.11739 (2024)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766462.2767755"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671607"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1454008.1454012"},{"key":"e_1_3_2_1_34_1","volume-title":"International conference on machine learning. PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748-8763."},{"key":"e_1_3_2_1_35_1","first-page":"10299","article-title":"Recommender systems with generative retrieval","volume":"36","author":"Rajput Shashank","year":"2023","unstructured":"Shashank Rajput, Nikhil Mehta, Anima Singh, Raghunandan Hulikal Keshavan, Trung Vu, Lukasz Heldt, Lichan Hong, Yi Tay, Vinh Tran, Jonah Samost, et al., 2023. Recommender systems with generative retrieval. Advances in Neural Information Processing Systems, Vol. 36 (2023), 10299-10315.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_36_1","volume-title":"Equivalence of distance-based and RKHS-based statistics in hypothesis testing. The annals of statistics","author":"Sejdinovic Dino","year":"2013","unstructured":"Dino Sejdinovic, Bharath Sriperumbudur, Arthur Gretton, and Kenji Fukumizu. 2013. Equivalence of distance-based and RKHS-based statistics in hypothesis testing. The annals of statistics (2013), 2263-2291."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.60.3389"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0024-3795(00)00322-0"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28749"},{"key":"e_1_3_2_1_40_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Sun Weiwei","year":"2024","unstructured":"Weiwei Sun, Lingyong Yan, Zheng Chen, Shuaiqiang Wang, Haichao Zhu, Pengjie Ren, Zhumin Chen, Dawei Yin, Maarten Rijke, and Zhaochun Ren. 2024. Learning to tokenize for generative retrieval. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"Rethinking large language model architectures for sequential recommendations. arXiv preprint arXiv:2402.09543","author":"Wang Hanbing","year":"2024","unstructured":"Hanbing Wang, Xiaorui Liu, Wenqi Fan, Xiangyu Zhao, Venkataramana Kini, Devendra Yadav, Fei Wang, Zhen Wen, Jiliang Tang, and Hui Liu. 2024b. Rethinking large language model architectures for sequential recommendations. arXiv preprint arXiv:2402.09543 (2024)."},{"key":"e_1_3_2_1_42_1","volume-title":"Saksham Singhal, Subhojit Som, et al.","author":"Wang Wenhui","year":"2022","unstructured":"Wenhui Wang, Hangbo Bao, Li Dong, Johan Bjorck, Zhiliang Peng, Qiang Liu, Kriti Aggarwal, Owais Khan Mohammed, Saksham Singhal, Subhojit Som, et al., 2022. Image as a foreign language: Beit pretraining for all vision and vision-language tasks. arXiv preprint arXiv:2208.10442 (2022)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679569"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3680264"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591767"},{"key":"e_1_3_2_1_46_1","volume-title":"Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang.","author":"Wang Yuhao","year":"2023","unstructured":"Yuhao Wang, Ha Tsz Lam, Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang. 2023b. Multi-task deep recommender systems: A survey. arXiv preprint arXiv:2302.03525 (2023)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3635807"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3730059"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679692"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679743"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591750"},{"key":"e_1_3_2_1_52_1","volume-title":"NeurIPS 2024 Workshop: Self-Supervised Learning-Theory and Practice.","author":"Wu Aoqi","year":"2024","unstructured":"Aoqi Wu, Yifan Yang, Xufang Luo, Yuqing Yang, Chunyu Wang, Liang Hu, Xiyang Dai, Dongdong Chen, Chong Luo, Lili Qiu, et al., 2024. LLM2CLIP: Powerful Language Model Unlock Richer Visual Representation. In NeurIPS 2024 Workshop: Self-Supervised Learning-Theory and Practice."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591932"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557348"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE60146.2024.00067"},{"key":"e_1_3_2_1_56_1","volume-title":"Learning ID-free Item Representation with Token Crossing for Multimodal Recommendation. arXiv preprint arXiv:2410.19276","author":"Zhang Kangning","year":"2024","unstructured":"Kangning Zhang, Jiarui Jin, Yingjie Qin, Ruilong Su, Jianghao Lin, Yong Yu, and Weinan Zhang. 2024b. Learning ID-free Item Representation with Token Crossing for Multimodal Recommendation. arXiv preprint arXiv:2410.19276 (2024)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3690624.3709304"},{"key":"e_1_3_2_1_58_1","unstructured":"Taolin Zhang Junwei Pan Jinpeng Wang Yaohua Zha Tao Dai Bin Chen Ruisheng Luo Xiaoxiang Deng Yuan Wang Ming Yue et al. 2024d. Towards Scalable Semantic Representation for Recommendation. arXiv preprint arXiv:2410.09560 (2024)."},{"key":"e_1_3_2_1_59_1","volume-title":"Collm: Integrating collaborative embeddings into large language models for recommendation","author":"Zhang Yang","year":"2025","unstructured":"Yang Zhang, Fuli Feng, Jizhi Zhang, Keqin Bao, Qifan Wang, and Xiangnan He. 2025a. Collm: Integrating collaborative embeddings into large language models for recommendation. IEEE Transactions on Knowledge and Data Engineering (2025)."},{"key":"e_1_3_2_1_60_1","volume-title":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval. 893-902","author":"Zhang Zijian","year":"2024","unstructured":"Zijian Zhang, Shuchang Liu, Jiaao Yu, Qingpeng Cai, Xiangyu Zhao, Chunxu Zhang, Ziru Liu, Qidong Liu, Hongwei Zhao, Lantao Hu, et al., 2024c. M3oe: Multi-domain multi-task mixture-of experts recommendation framework. In Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval. 893-902."},{"key":"e_1_3_2_1_61_1","first-page":"44880","article-title":"KuaiSim: A comprehensive simulator for recommender systems","volume":"36","author":"Zhao Kesen","year":"2023","unstructured":"Kesen Zhao, Shuchang Liu, Qingpeng Cai, Xiangyu Zhao, Ziru Liu, Dong Zheng, Peng Jiang, and Kun Gai. 2023. KuaiSim: A comprehensive simulator for recommender systems. Advances in Neural Information Processing Systems, Vol. 36 (2023), 44880-44897.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557461"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412044"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE60146.2024.00118"}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","location":"Seoul Republic of Korea","acronym":"CIKM '25","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761169","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T01:52:01Z","timestamp":1765504321000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761169"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":66,"alternative-id":["10.1145\/3746252.3761169","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761169","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}