{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T15:52:13Z","timestamp":1775145133892,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272437, U24B20180, 62121002"],"award-info":[{"award-number":["62272437, U24B20180, 62121002"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,22]]},"DOI":"10.1145\/3696410.3714843","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:52:18Z","timestamp":1745362338000},"page":"264-274","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Personalized Image Generation with Large Multimodal Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5937-7289","authenticated-orcid":false,"given":"Yiyan","family":"Xu","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5199-1428","authenticated-orcid":false,"given":"Wenjie","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7863-5183","authenticated-orcid":false,"given":"Yang","family":"Zhang","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2280-6521","authenticated-orcid":false,"given":"Biao","family":"Tang","sequence":"additional","affiliation":[{"name":"Meituan, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6211-4543","authenticated-orcid":false,"given":"Peng","family":"Yan","sequence":"additional","affiliation":[{"name":"Meituan, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5828-9842","authenticated-orcid":false,"given":"Fuli","family":"Feng","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8472-7992","authenticated-orcid":false,"given":"Xiangnan","family":"He","sequence":"additional","affiliation":[{"name":"MoE Key Lab of BIPC, University of Science and Technology of China, Hefei, China"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Tallrec: An effective and efficient tuning framework to align large language model with recommendation. In RecSys. ACM, 1007--1014.","author":"Bao Keqin","year":"2023","unstructured":"Keqin Bao, Jizhi Zhang, Yang Zhang, Wenjie Wang, Fuli Feng, and Xiangnan He. 2023. Tallrec: An effective and efficient tuning framework to align large language model with recommendation. In RecSys. ACM, 1007--1014."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"\u00c1d\u00e1m Tibor Czapp M\u00e1ty\u00e1s Jani B\u00e1lint Domi\u00e1n and Bal\u00e1zs Hidasi. 2024. Dynamic Product Image Generation and Recommendation at Scale for Personalized E-commerce. In RecSys. ACM 768--770.","DOI":"10.1145\/3640457.3688045"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1080\/09298215.2023.2166848"},{"key":"e_1_3_2_1_5_1","volume-title":"Gal Chechik, and Daniel Cohen-Or.","author":"Gal Rinon","year":"2023","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit Haim Bermano, Gal Chechik, and Daniel Cohen-Or. 2023. An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_6_1","volume-title":"TOIS","volume":"42","author":"Gao Chongming","year":"2023","unstructured":"Chongming Gao, Shiqi Wang, Shijun Li, Jiawei Chen, Xiangnan He, Wenqiang Lei, Biao Li, Yuan Zhang, and Peng Jiang. 2023. CIRS: Bursting Filter Bubbles by Counterfactual Interactive Recommender System. TOIS, Vol. 42, 1, Article 14 (2023)."},{"key":"e_1_3_2_1_7_1","unstructured":"Yuying Ge Sijie Zhao Ziyun Zeng Yixiao Ge Chen Li Xintao Wang and Ying Shan. 2024. Making LLaMA SEE and Draw with SEED Tokenizer. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401063"},{"key":"e_1_3_2_1_9_1","unstructured":"Zecheng He Bo Sun Felix Juefei-Xu Haoyu Ma Ankit Ramchandani Vincent Cheung Siddharth Shah Anmol Kalia Harihar Subramanyam Alireza Zareian et al. 2024. Imagine yourself: Tuning-Free Personalized Image Generation. arXiv:2409.13346 (2024)."},{"key":"e_1_3_2_1_10_1","volume-title":"Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen.","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, yelong shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_11_1","volume-title":"Yu-Chuan Su, Wenhu Chen, Yandong Li, Kihyuk Sohn, Yang Zhao, Xue Ben, Boqing Gong, William Cohen, et al.","author":"Hu Hexiang","year":"2024","unstructured":"Hexiang Hu, Kelvin CK Chan, Yu-Chuan Su, Wenhu Chen, Yandong Li, Kihyuk Sohn, Yang Zhao, Xue Ben, Boqing Gong, William Cohen, et al. 2024. Instruct-Imagen: Image generation with multi-modal instruction. In CVPR. IEEE, 4754--4763."},{"key":"e_1_3_2_1_12_1","volume-title":"Video-lavit: Unified video-language pre-training with decoupled visual-motional tokenization. arXiv:2402.03161","author":"Jin Yang","year":"2024","unstructured":"Yang Jin, Zhicheng Sun, Kun Xu, Liwei Chen, Hao Jiang, Quzhe Huang, Chengru Song, Yuliang Liu, Di Zhang, Yang Song, et al. 2024a. Video-lavit: Unified video-language pre-training with decoupled visual-motional tokenization. arXiv:2402.03161 (2024)."},{"key":"e_1_3_2_1_13_1","unstructured":"Yang Jin Kun Xu Kun Xu Liwei Chen Chao Liao Jianchao Tan Quzhe Huang Bin Chen Chengru Song Dai Meng Di Zhang Wenwu Ou Kun Gai and Yadong Mu. 2024b. Unified Language-Vision Pretraining in LLM with Dynamic Discrete Visual Tokenization. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_14_1","volume-title":"NeurIPS","volume":"36","author":"Koh Jing Yu","year":"2024","unstructured":"Jing Yu Koh, Daniel Fried, and Russ R Salakhutdinov. 2024. Generating images with multimodal language models. NeurIPS, Vol. 36 (2024)."},{"key":"e_1_3_2_1_15_1","unstructured":"Ishita Kumar Snigdha Viswanathan Sushrita Yerra Alireza Salemi Ryan A Rossi Franck Dernoncourt Hanieh Deilamsalehy Xiang Chen Ruiyi Zhang Shubham Agarwal et al. 2024. LongLaMP: A Benchmark for Personalized Long-form Text Generation. arXiv:2407.11016 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"e_1_3_2_1_17_1","volume-title":"NeurIPS","volume":"36","author":"Lam Max WY","year":"2024","unstructured":"Max WY Lam, Qiao Tian, Tang Li, Zongyu Yin, Siyuan Feng, Ming Tu, Yuliang Ji, Rui Xia, Mingbo Ma, Xuchen Song, et al. 2024. Efficient neural music generation. NeurIPS, Vol. 36 (2024)."},{"key":"e_1_3_2_1_18_1","volume-title":"ICML. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In ICML. PMLR, 19730--19742."},{"key":"e_1_3_2_1_19_1","volume-title":"When stylegan meets stable diffusion: a w adapter for personalized image generation","author":"Li Xiaoming","unstructured":"Xiaoming Li, Xinyu Hou, and Chen Change Loy. 2024. When stylegan meets stable diffusion: a w adapter for personalized image generation. In CVPR. IEEE, 2187--2196."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671884"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3687481"},{"key":"e_1_3_2_1_22_1","volume-title":"NeurIPS","volume":"36","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024a. Visual instruction tuning. NeurIPS, Vol. 36 (2024)."},{"key":"e_1_3_2_1_23_1","volume-title":"Sora: A review on background, technology, limitations, and opportunities of large vision models. arXiv:2402.17177","author":"Liu Yixin","year":"2024","unstructured":"Yixin Liu, Kai Zhang, Yuan Li, Zhiling Yan, Chujie Gao, Ruoxi Chen, Zhengqing Yuan, Yue Huang, Hanchi Sun, Jianfeng Gao, et al. 2024b. Sora: A review on background, technology, limitations, and opportunities of large vision models. arXiv:2402.17177 (2024)."},{"key":"e_1_3_2_1_24_1","unstructured":"Chris J. Maddison Andriy Mnih and Yee Whye Teh. 2017. The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Navonil Majumder Chia-Yu Hung Deepanway Ghosal Wei-Ning Hsu Rada Mihalcea and Soujanya Poria. 2024. Tango 2: Aligning Diffusion-based Text-to-Audio Generative Models through Direct Preference Optimization. In MM. ACM.","DOI":"10.1145\/3664647.3681688"},{"key":"e_1_3_2_1_26_1","volume-title":"DINOv2: Learning Robust Visual Features without Supervision. TMLR","author":"Oquab Maxime","year":"2024","unstructured":"Maxime Oquab, Timoth\u00e9e Darcet, Th\u00e9o Moutakanni, Huy V. Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel HAZIZA, Francisco Massa, Alaaeldin El-Nouby, Mido Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Herve Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, and Piotr Bojanowski. 2024. DINOv2: Learning Robust Visual Features without Supervision. TMLR (2024)."},{"key":"e_1_3_2_1_27_1","volume-title":"Investigating Personalization Methods in Text to Music Generation","author":"Plitsis Manos","unstructured":"Manos Plitsis, Theodoros Kouzelis, Georgios Paraskevopoulos, Vassilis Katsouros, and Yannis Panagakis. 2024. Investigating Personalization Methods in Text to Music Generation. In ICASSP. IEEE, 1081--1085."},{"key":"e_1_3_2_1_28_1","volume-title":"SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis. In ICLR. OpenReview.net.","author":"Podell Dustin","year":"2024","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2024. SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_29_1","volume-title":"Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In ICML. PMLR, 8748--8763."},{"key":"e_1_3_2_1_30_1","volume-title":"NeurIPS","volume":"36","author":"Rafailov Rafael","year":"2024","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Christopher D Manning, Stefano Ermon, and Chelsea Finn. 2024. Direct preference optimization: Your language model is secretly a reward model. NeurIPS, Vol. 36 (2024)."},{"key":"e_1_3_2_1_31_1","volume-title":"NeurIPS","volume":"36","author":"Rame Alexandre","year":"2024","unstructured":"Alexandre Rame, Guillaume Couairon, Corentin Dancette, Jean-Baptiste Gaya, Mustafa Shukor, Laure Soulier, and Matthieu Cord. 2024. Rewarded soups: towards pareto-optimal alignment by interpolating weights fine-tuned on diverse rewards. NeurIPS, Vol. 36 (2024)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-resolution image synthesis with latent diffusion models. In CVPR. 10684--10695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_33_1","volume-title":"Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation","author":"Ruiz Nataniel","year":"2023","unstructured":"Nataniel Ruiz, Yuanzhen Li, Varun Jampani, Yael Pritch, Michael Rubinstein, and Kfir Aberman. 2023. Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In CVPR. IEEE, 22500--22510."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Alireza Salemi Surya Kallumadi and Hamed Zamani. 2024a. Optimization methods for personalizing large language models through retrieval augmentation. In SIGIR. ACM 752--762.","DOI":"10.1145\/3626772.3657783"},{"key":"e_1_3_2_1_35_1","volume-title":"LaMP: When Large Language Models Meet Personalization. In ACL. ACL, 7370--7392","author":"Salemi Alireza","year":"2024","unstructured":"Alireza Salemi, Sheshera Mysore, Michael Bendersky, and Hamed Zamani. 2024b. LaMP: When Large Language Models Meet Personalization. In ACL. ACL, 7370--7392."},{"key":"e_1_3_2_1_36_1","volume-title":"PMG: Personalized Multimodal Generation with Large Language Models. In WWW. ACM, 3833--3843.","author":"Shen Xiaoteng","year":"2024","unstructured":"Xiaoteng Shen, Rui Zhang, Xiaoyan Zhao, Jieming Zhu, and Xi Xiao. 2024. PMG: Personalized Multimodal Generation with Large Language Models. In WWW. ACM, 3833--3843."},{"key":"e_1_3_2_1_37_1","volume-title":"Instantbooth: Personalized text-to-image generation without test-time finetuning","author":"Shi Jing","year":"2024","unstructured":"Jing Shi, Wei Xiong, Zhe Lin, and Hyun Joon Jung. 2024. Instantbooth: Personalized text-to-image generation without test-time finetuning. In CVPR. IEEE, 8543--8552."},{"key":"e_1_3_2_1_38_1","volume-title":"Flavian Vasile, Ga\u00ebtan Racic, and Ugo Tanielian.","author":"Shilova Veronika","year":"2023","unstructured":"Veronika Shilova, Ludovic Dos Santos, Flavian Vasile, Ga\u00ebtan Racic, and Ugo Tanielian. 2023. AdBooster: Personalized Ad Creative Generation using Stable Diffusion Outpainting. arXiv:2309.11507 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"Personalized Pieces: Efficient Personalized Large Language Models through Collaborative Efforts. arXiv:2406.10471","author":"Tan Zhaoxuan","year":"2024","unstructured":"Zhaoxuan Tan, Zheyuan Liu, and Meng Jiang. 2024. Personalized Pieces: Efficient Personalized Large Language Models through Collaborative Efforts. arXiv:2406.10471 (2024)."},{"key":"e_1_3_2_1_40_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Yonghui Wu Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth et al. 2023. Gemini: a family of highly capable multimodal models. arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_1_41_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Shanu Vashishtha Abhinav Prakash Lalitesh Morishetti Kaushiki Nag Yokila Arora Sushant Kumar and Kannan Achan. 2024. Chaining text-to-image and large language model: A novel approach for generating personalized e-commerce banners. In KDD. ACM 5825--5835.","DOI":"10.1145\/3637528.3671636"},{"key":"e_1_3_2_1_43_1","volume-title":"Attention is all you need. NeurIPS","author":"Vaswani A","year":"2017","unstructured":"A Vaswani. 2017. Attention is all you need. NeurIPS (2017)."},{"key":"e_1_3_2_1_44_1","volume-title":"Generative recommendation: Towards next-generation recommender paradigm. arXiv:2304.03516","author":"Wang Wenjie","year":"2023","unstructured":"Wenjie Wang, Xinyu Lin, Fuli Feng, Xiangnan He, and Tat-Seng Chua. 2023a. Generative recommendation: Towards next-generation recommender paradigm. arXiv:2304.03516 (2023)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Wenjie Wang Yiyan Xu Fuli Feng Xinyu Lin Xiangnan He and Tat-Seng Chua. 2023b. Diffusion recommender model. In SIGIR. ACM 832--841.","DOI":"10.1145\/3539618.3591663"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Xiang Wang Xiangnan He Meng Wang Fuli Feng and Tat-Seng Chua. 2019. Neural graph collaborative filtering. In SIGIR. ACM 165--174.","DOI":"10.1145\/3331184.3331267"},{"key":"e_1_3_2_1_47_1","volume-title":"Multiscale structural similarity for image quality assessment","author":"Wang Zhou","unstructured":"Zhou Wang, Eero P Simoncelli, and Alan C Bovik. 2003. Multiscale structural similarity for image quality assessment. In ACSSC. IEEE, 1398--1402."},{"key":"e_1_3_2_1_48_1","first-page":"1","article-title":"Personalized news recommendation: Methods and challenges","volume":"41","author":"Wu Chuhan","year":"2023","unstructured":"Chuhan Wu, Fangzhao Wu, Yongfeng Huang, and Xing Xie. 2023. Personalized news recommendation: Methods and challenges. TOIS, Vol. 41, 1 (2023), 1--50.","journal-title":"TOIS"},{"key":"e_1_3_2_1_49_1","first-page":"97","article-title":"Data mining with big data","volume":"26","author":"Wu Xindong","year":"2013","unstructured":"Xindong Wu, Xingquan Zhu, Gong-Qing Wu, and Wei Ding. 2013. Data mining with big data. TKDE, Vol. 26, 1 (2013), 97--107.","journal-title":"TKDE"},{"key":"e_1_3_2_1_50_1","volume-title":"Seeing and hearing: Open-domain visual-audio generation with diffusion latent aligners","author":"Xing Yazhou","unstructured":"Yazhou Xing, Yingqing He, Zeyue Tian, Xintao Wang, and Qifeng Chen. 2024. Seeing and hearing: Open-domain visual-audio generation with diffusion latent aligners. In CVPR. IEEE, 7151--7161."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Yiyan Xu Wenjie Wang Fuli Feng Yunshan Ma Jizhi Zhang and Xiangnan He. 2024. Diffusion Models for Generative Outfit Recommendation. In SIGIR. ACM 1350--1359.","DOI":"10.1145\/3626772.3657719"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Hao Yang Jianxin Yuan Shuai Yang Linhe Xu Shuo Yuan and Yifan Zeng. 2024b. A New Creative Generation Pipeline for Click-Through Rate with Stable Diffusion Model. In Companion WWW. ACM 180--189.","DOI":"10.1145\/3589335.3648315"},{"key":"e_1_3_2_1_53_1","volume-title":"NeurIPS","volume":"36","author":"Yang Yukang","year":"2024","unstructured":"Yukang Yang, Dongnan Gui, Yuhui Yuan, Weicong Liang, Haisong Ding, Han Hu, and Kai Chen. 2024a. Glyphcontrol: Glyph conditional control for visual text generation. NeurIPS, Vol. 36 (2024)."},{"key":"e_1_3_2_1_54_1","volume-title":"Personalized fashion design","author":"Yu Cong","unstructured":"Cong Yu, Yang Hu, Yan Chen, and Bing Zeng. 2019. Personalized fashion design. In ICCV. IEEE, 9046--9055."},{"key":"e_1_3_2_1_55_1","volume-title":"The unreasonable effectiveness of deep features as a perceptual metric","author":"Zhang Richard","unstructured":"Richard Zhang, Phillip Isola, Alexei A Efros, Eli Shechtman, and Oliver Wang. 2018. The unreasonable effectiveness of deep features as a perceptual metric. In CVPR. IEEE, 586--595."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Jujia Zhao Wenjie Wang Yiyan Xu Teng Sun Fuli Feng and Tat-Seng Chua. 2024. Denoising diffusion recommender model. In SIGIR. ACM 1370--1379.","DOI":"10.1145\/3626772.3657825"},{"key":"e_1_3_2_1_57_1","first-page":"1","article-title":"Understanding or Manipulation","volume":"42","author":"Zhu Zhengbang","year":"2024","unstructured":"Zhengbang Zhu, Rongjun Qin, Junjie Huang, Xinyi Dai, Yang Yu, Yong Yu, and Weinan Zhang. 2024. Understanding or Manipulation: Rethinking Online Performance Gains of Modern Recommender Systems. TOIS, Vol. 42, 4 (2024), 1--32.","journal-title":"Rethinking Online Performance Gains of Modern Recommender Systems. TOIS"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714843","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714843","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:42Z","timestamp":1750295922000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714843"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":57,"alternative-id":["10.1145\/3696410.3714843","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714843","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}