{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:21:59Z","timestamp":1776889319124,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"The Knowledge Innovation Program of Wuhan-Shuguang Project","award":["2023010201020226"],"award-info":[{"award-number":["2023010201020226"]}]},{"name":"The National Key R&D Program of China","award":["2022YFC3301000"],"award-info":[{"award-number":["2022YFC3301000"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,22]]},"DOI":"10.1145\/3696410.3714836","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:52:18Z","timestamp":1745362338000},"page":"2262-2275","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["CTR-Driven Advertising Image Generation with Multimodal Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7119-4792","authenticated-orcid":false,"given":"Xingye","family":"Chen","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8890-4956","authenticated-orcid":false,"given":"Wei","family":"Feng","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1386-8381","authenticated-orcid":false,"given":"Zhenbang","family":"Du","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4006-774X","authenticated-orcid":false,"given":"Weizhen","family":"Wang","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5508-3605","authenticated-orcid":false,"given":"Yanyin","family":"Chen","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3451-6884","authenticated-orcid":false,"given":"Haohan","family":"Wang","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3748-5980","authenticated-orcid":false,"given":"Linkai","family":"Liu","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7362-7897","authenticated-orcid":false,"given":"Yaoyu","family":"Li","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8528-3529","authenticated-orcid":false,"given":"Jinyuan","family":"Zhao","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1331-5020","authenticated-orcid":false,"given":"Yu","family":"Li","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6391-4814","authenticated-orcid":false,"given":"Zheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5518-7077","authenticated-orcid":false,"given":"Jingjing","family":"Lv","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6983-5213","authenticated-orcid":false,"given":"Junjie","family":"Shen","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1379-5044","authenticated-orcid":false,"given":"Zhangang","family":"Lin","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8555-2020","authenticated-orcid":false,"given":"Jingping","family":"Shao","sequence":"additional","affiliation":[{"name":"JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1141-0454","authenticated-orcid":false,"given":"Yuanjie","family":"Shao","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6227-1346","authenticated-orcid":false,"given":"Xinge","family":"You","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2736-3920","authenticated-orcid":false,"given":"Changxin","family":"Gao","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9167-1496","authenticated-orcid":false,"given":"Nong","family":"Sang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. GPT-4V(ision) System Card. https:\/\/openai.com\/index\/gpt-4v-system-card\/"},{"key":"e_1_3_2_1_2_1","unstructured":"2024. Claude3.5-sonnet. https:\/\/www.anthropic.com\/news\/claude-3--5-sonnet"},{"key":"e_1_3_2_1_3_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_4_1","unstructured":"Yuntao Bai Andy Jones Kamal Ndousse Amanda Askell Anna Chen Nova DasSarma Dawn Drain Stanislav Fort Deep Ganguli Tom Henighan et al. 2022. Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862 (2022)."},{"key":"e_1_3_2_1_5_1","volume-title":"VirtualModel: Generating Object-ID-retentive Human-object Interaction Image by Diffusion Model for E-commerce Marketing. arXiv preprint arXiv:2405.09985","author":"Chen Binghui","year":"2024","unstructured":"Binghui Chen, Chongyang Zhong, Wangmeng Xiang, Yifeng Geng, and Xuansong Xie. 2024. VirtualModel: Generating Object-ID-retentive Human-object Interaction Image by Diffusion Model for E-commerce Marketing. arXiv preprint arXiv:2405.09985 (2024)."},{"key":"e_1_3_2_1_6_1","volume-title":"d.]. Automated Creative Optimization for E-Commerce Advertising. arXiv","author":"Chen J","year":"2021","unstructured":"J Chen, J Xu, G Jiang, T Ge, Z Zhang, D Lian, and K Zheng. [n. d.]. Automated Creative Optimization for E-Commerce Advertising. arXiv 2021. arXiv preprint arXiv:2103.00436 ([n. d.])."},{"key":"e_1_3_2_1_7_1","volume-title":"Xing","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E. Gonzalez, Ion Stoica, and Eric P. Xing. 2023. Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality. https:\/\/lmsys.org\/blog\/2023-03--30-vicuna\/"},{"key":"e_1_3_2_1_8_1","volume-title":"Deep reinforcement learning from human preferences. Advances in neural information processing systems 30","author":"Christiano Paul F","year":"2017","unstructured":"Paul F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep reinforcement learning from human preferences. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_9_1","volume-title":"Towards Reliable Advertising Image Generation Using Human Feedback. In European Conference on Computer Vision. Springer, 399--415","author":"Du Zhenbang","year":"2025","unstructured":"Zhenbang Du, Wei Feng, Haohan Wang, Yaoyu Li, Jingsen Wang, Jian Li, Zheng Zhang, Jingjing Lv, Xin Zhu, Junsheng Jin, et al . 2025. Towards Reliable Advertising Image Generation Using Human Feedback. In European Conference on Computer Vision. Springer, 399--415."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3272007"},{"key":"e_1_3_2_1_11_1","volume-title":"et al","author":"Aohan Zeng Team GLM","year":"2024","unstructured":"Team GLM, Aohan Zeng, Bin Xu, Bowen Wang, Chenhui Zhang, Da Yin, Diego Rojas, Guanyu Feng, Hanlin Zhao, Hanyu Lai, et al . 2024. ChatGLM: A Family of Large Language Models from GLM-130B to GLM-4 All Tools. arXiv preprint arXiv:2406.12793 (2024)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_13_1","volume-title":"Optimizing prompts for text-to-image generation. Advances in Neural Information Processing Systems 36","author":"Hao Yaru","year":"2024","unstructured":"Yaru Hao, Zewen Chi, Li Dong, and Furu Wei. 2024. Optimizing prompts for text-to-image generation. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_14_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02644"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCIT.2017.8261198"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959134"},{"key":"e_1_3_2_1_18_1","volume-title":"Staging e-commerce products for online advertising using retrieval assisted image generation. arXiv preprint arXiv:2307.15326","author":"Ku Yueh-Ning","year":"2023","unstructured":"Yueh-Ning Ku, Mikhail Kuznetsov, Shaunak Mishra, and Paloma de Juan. 2023. Staging e-commerce products for online advertising using retrieval assisted image generation. arXiv preprint arXiv:2307.15326 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"Vani D Naik, Smita Shiralli, VG Sunil, and Moula Husain.","author":"Kumar Rohit","year":"2015","unstructured":"Rohit Kumar, Sneha Manjunath Naik, Vani D Naik, Smita Shiralli, VG Sunil, and Moula Husain. 2015. Predicting clicks: CTR estimation of advertisements using logistic regression classifier. In 2015 IEEE international advance computing conference (IACC). IEEE, 1134--1138."},{"key":"e_1_3_2_1_20_1","volume-title":"Aligning text-to-image models using human feedback. arXiv preprint arXiv:2302.12192","author":"Lee Kimin","year":"2023","unstructured":"Kimin Lee, Hao Liu, Moonkyung Ryu, Olivia Watkins, Yuqing Du, Craig Boutilier, Pieter Abbeel, Mohammad Ghavamzadeh, and Shixiang Shane Gu. 2023. Aligning text-to-image models using human feedback. arXiv preprint arXiv:2302.12192 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"Parrot: Pareto-optimal multi-reward reinforcement learning framework for text-to-image generation. arXiv preprint arXiv:2401.05675","author":"Lee Seung Hyun","year":"2024","unstructured":"Seung Hyun Lee, Yinxiao Li, Junjie Ke, Innfarn Yoo, Han Zhang, Jiahui Yu, Qifei Wang, Fei Deng, Glenn Entis, Junfeng He, et al. 2024. Parrot: Pareto-optimal multi-reward reinforcement learning framework for text-to-image generation. arXiv preprint arXiv:2401.05675 (2024)."},{"key":"e_1_3_2_1_22_1","volume-title":"LLaVA-OneVision: Easy Visual Task Transfer. arXiv preprint arXiv:2408.03326","author":"Li Bo","year":"2024","unstructured":"Bo Li, Yuanhan Zhang, Dong Guo, Renrui Zhang, Feng Li, Hao Zhang, Kaichen Zhang, Yanwei Li, Ziwei Liu, and Chunyuan Li. 2024. LLaVA-OneVision: Easy Visual Task Transfer. arXiv preprint arXiv:2408.03326 (2024)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615028"},{"key":"e_1_3_2_1_24_1","unstructured":"Zhaochen Li Fengheng Li Wei Feng Honghe Zhu An Liu Yaoyu Li Zheng Zhang Jingjing Lv Xin Zhu Junjie Shen et al. 2023. Planning and Rendering: Towards End-to-End Product Poster Generation. arXiv preprint arXiv:2312.08822 (2023)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531855"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings, Part V 13","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6--12, 2014, Proceedings, Part V 13. Springer, 740--755."},{"key":"e_1_3_2_1_27_1","unstructured":"Haotian Liu Chunyuan Li Yuheng Li Bo Li Yuanhan Zhang Sheng Shen and Yong Jae Lee. 2024. LLaVA-NeXT: Improved reasoning OCR and world knowledge. https:\/\/llava-vl.github.io\/blog\/2024-01--30-llava-next\/"},{"key":"e_1_3_2_1_28_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems 36","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems 36 (2024)."},{"key":"e_1_3_2_1_29_1","volume-title":"Kaipeng Zhang, and Yang You.","author":"Liu Yanqing","year":"2023","unstructured":"Yanqing Liu, Kai Wang, Wenqi Shao, Ping Luo, Yu Qiao, Mike Zheng Shou, Kaipeng Zhang, and Yang You. 2023. Mllms-augmented visual-language representation learning. arXiv preprint arXiv:2311.18765 (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_1_31_1","volume-title":"International conference on machine learning. PMLR, 2285--2294","author":"MacGlashan James","year":"2017","unstructured":"James MacGlashan, Mark K Ho, Robert Loftin, Bei Peng, Guan Wang, David L Roberts, Matthew E Taylor, and Michael L Littman. 2017. Interactive learning from policy-dependent human feedback. In International conference on machine learning. PMLR, 2285--2294."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412720"},{"key":"e_1_3_2_1_33_1","volume-title":"InsertDiffusion: Identity Preserving Visualization of Objects through a Training-Free Diffusion Architecture. arXiv preprint arXiv:2407.10592","author":"Mueller Phillip","year":"2024","unstructured":"Phillip Mueller, Jannik Wiese, Ioan Craciun, and Lars Mikelsons. 2024. InsertDiffusion: Identity Preserving Visualization of Objects through a Training-Free Diffusion Architecture. arXiv preprint arXiv:2407.10592 (2024)."},{"key":"e_1_3_2_1_34_1","unstructured":"Long Ouyang Jeffrey Wu Xu Jiang Diogo Almeida Carroll Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray et al. 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems 35 (2022) 27730--27744."},{"key":"e_1_3_2_1_35_1","volume-title":"Personalizing Reinforcement Learning from Human Feedback with Variational Preference Learning. arXiv preprint arXiv:2408.10075","author":"Poddar Sriyash","year":"2024","unstructured":"Sriyash Poddar, Yanming Wan, Hamish Ivison, Abhishek Gupta, and Natasha Jaques. 2024. Personalizing Reinforcement Learning from Human Feedback with Variational Preference Learning. arXiv preprint arXiv:2408.10075 (2024)."},{"key":"e_1_3_2_1_36_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_37_1","volume-title":"Direct preference optimization: Your language model is secretly a reward model. Advances in Neural Information Processing Systems 36","author":"Rafailov Rafael","year":"2024","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Christopher D Manning, Stefano Ermon, and Chelsea Finn. 2024. Direct preference optimization: Your language model is secretly a reward model. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_39_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_40_1","volume-title":"Distributional preference learning: Understanding and accounting for hidden context in RLHF. arXiv preprint arXiv:2312.08358","author":"Siththaranjan Anand","year":"2023","unstructured":"Anand Siththaranjan, Cassidy Laidlaw, and Dylan Hadfield-Menell. 2023. Distributional preference learning: Understanding and accounting for hidden context in RLHF. arXiv preprint arXiv:2312.08358 (2023)."},{"key":"e_1_3_2_1_41_1","volume-title":"Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)."},{"key":"e_1_3_2_1_42_1","first-page":"3008","article-title":"Learning to summarize with human feedback","volume":"33","author":"Stiennon Nisan","year":"2020","unstructured":"Nisan Stiennon, Long Ouyang, Jeffrey Wu, Daniel Ziegler, Ryan Lowe, Chelsea Voss, Alec Radford, Dario Amodei, and Paul F Christiano. 2020. Learning to summarize with human feedback. Advances in Neural Information Processing Systems 33 (2020), 3008--3021.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_43_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10888013"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449910"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548763"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531909"},{"key":"e_1_3_2_1_48_1","volume-title":"CREATER: CTR-driven advertising text generation with controlled pre-training and contrastive fine-tuning. arXiv preprint arXiv:2205.08943","author":"Wei Penghui","year":"2022","unstructured":"Penghui Wei, Xuanhua Yang, Shaoguo Liu, Liang Wang, and Bo Zheng. 2022. CREATER: CTR-driven advertising text generation with controlled pre-training and contrastive fine-tuning. arXiv preprint arXiv:2205.08943 (2022)."},{"key":"e_1_3_2_1_49_1","volume-title":"Better aligning text-to-image models with human preference. arXiv preprint arXiv:2303.14420 1, 3","author":"Wu Xiaoshi","year":"2023","unstructured":"Xiaoshi Wu, Keqiang Sun, Feng Zhu, Rui Zhao, and Hongsheng Li. 2023. Better aligning text-to-image models with human preference. arXiv preprint arXiv:2303.14420 1, 3 (2023)."},{"key":"e_1_3_2_1_50_1","volume-title":"et al","author":"Yang An","year":"2024","unstructured":"An Yang, Baosong Yang, Binyuan Hui, Bo Zheng, Bowen Yu, Chang Zhou, Chengpeng Li, Chengyuan Li, Dayiheng Liu, Fei Huang, et al . 2024. Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589335.3648315"},{"key":"e_1_3_2_1_52_1","volume-title":"International Journal of Computer Vision","author":"Zang Yuhang","year":"2024","unstructured":"Yuhang Zang, Wei Li, Jun Han, Kaiyang Zhou, and Chen Change Loy. 2024. Contextual object detection with multimodal large language models. International Journal of Computer Vision (2024), 1--19."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3661350"},{"key":"e_1_3_2_1_55_1","volume-title":"Beyond one-preference-for-all: Multi-objective direct preference optimization. arXiv preprint arXiv:2310.03708","author":"Zhou Zhanhui","year":"2023","unstructured":"Zhanhui Zhou, Jie Liu, Chao Yang, Jing Shao, Yu Liu, Xiangyu Yue, Wanli Ouyang, and Yu Qiao. 2023. Beyond one-preference-for-all: Multi-objective direct preference optimization. arXiv preprint arXiv:2310.03708 (2023)."},{"key":"e_1_3_2_1_56_1","volume-title":"Fine-tuning language models from human preferences. arXiv preprint arXiv:1909.08593","author":"Ziegler Daniel M","year":"2019","unstructured":"Daniel M Ziegler, Nisan Stiennon, Jeffrey Wu, Tom B Brown, Alec Radford, Dario Amodei, Paul Christiano, and Geoffrey Irving. 2019. Fine-tuning language models from human preferences. arXiv preprint arXiv:1909.08593 (2019)."}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714836","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714836","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:42Z","timestamp":1750295922000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714836"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":56,"alternative-id":["10.1145\/3696410.3714836","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714836","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}