{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T16:23:46Z","timestamp":1778171026272,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T00:00:00Z","timestamp":1728345600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Natural Science Foundation of China","award":["U21B2026"],"award-info":[{"award-number":["U21B2026"]}]},{"name":"Natural Science Foundation of China","award":["62372260"],"award-info":[{"award-number":["62372260"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,8]]},"DOI":"10.1145\/3640457.3688075","type":"proceedings-article","created":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T15:39:28Z","timestamp":1728401968000},"page":"33-42","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":25,"title":["Large Language Models as Evaluators for Recommendation Explanations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0985-6636","authenticated-orcid":false,"given":"Xiaoyu","family":"Zhang","sequence":"first","affiliation":[{"name":"Tsinghua University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2837-1358","authenticated-orcid":false,"given":"Yishan","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8875-1850","authenticated-orcid":false,"given":"Jiayin","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua Univeristy, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4596-0047","authenticated-orcid":false,"given":"Bowen","family":"Sun","sequence":"additional","affiliation":[{"name":"Tsinghua Univeristy, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5604-7527","authenticated-orcid":false,"given":"Weizhi","family":"Ma","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9733-0521","authenticated-orcid":false,"given":"Peijie","family":"Sun","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3158-1920","authenticated-orcid":false,"given":"Min","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,8]]},"reference":[{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401032"},{"key":"e_1_3_2_1_3_1","volume-title":"Language Models are Few-Shot Learners","author":"Brown Tom","year":"1877","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel Ziegler, Jeffrey Wu, Clemens Winter, Chris Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners., 1877\u20131901\u00a0pages. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641289"},{"key":"e_1_3_2_1_5_1","volume-title":"Generate Natural Language Explanations for Recommendation. CoRR abs\/2101.03392","author":"Chen Hanxiong","year":"2021","unstructured":"Hanxiong Chen, Xu Chen, Shaoyun Shi, and Yongfeng Zhang. 2021. Generate Natural Language Explanations for Recommendation. CoRR abs\/2101.03392 (2021)."},{"key":"e_1_3_2_1_6_1","unstructured":"Xu Chen Yongfeng Zhang and Ji-Rong Wen. 2022. Measuring \"Why\" in Recommender Systems: a Comprehensive Survey on the Evaluation of Explainable Recommendation. arxiv:2202.06466\u00a0[cs.IR]"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence. 2994\u20133000","author":"Chen Zhongxia","year":"2021","unstructured":"Zhongxia Chen, Xiting Wang, Xing Xie, Mehul Parsana, Akshay Soni, Xiang Ao, and Enhong Chen. 2021. Towards explainable conversational recommendation. In Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence. 2994\u20133000."},{"key":"e_1_3_2_1_9_1","volume-title":"\u00a0T. Martins","author":"Freitag Markus","year":"2022","unstructured":"Markus Freitag, Ricardo Rei, Nitika Mathur, Chi-kiu Lo, Craig Stewart, Eleftherios Avramidis, Tom Kocmi, George Foster, Alon Lavie, and Andr\u00e9 F.\u00a0T. Martins. 2022. Results of WMT22 Metrics Shared Task: Stop Using BLEU \u2013 Neural Metrics Are Better and More Robust. In Proceedings of the Seventh Conference on Machine Translation (WMT), Philipp Koehn, Lo\u00efc Barrault, Ond\u0159ej Bojar, Fethi Bougares, Rajen Chatterjee, Marta\u00a0R. Costa-juss\u00e0, Christian Federmann, Mark Fishel, Alexander Fraser, Markus Freitag, Yvette Graham, Roman Grundkiewicz, Paco Guzman, Barry Haddow, Matthias Huck, Antonio Jimeno\u00a0Yepes, Tom Kocmi, Andr\u00e9 Martins, Makoto Morishita, Christof Monz, Masaaki Nagata, Toshiaki Nakazawa, Matteo Negri, Aur\u00e9lie N\u00e9v\u00e9ol, Mariana Neves, Martin Popel, Marco Turchi, and Marcos Zampieri (Eds.). Association for Computational Linguistics, Abu Dhabi, United Arab Emirates (Hybrid), 46\u201368. https:\/\/aclanthology.org\/2022.wmt-1.2"},{"key":"e_1_3_2_1_10_1","unstructured":"Jinlan Fu See-Kiong Ng Zhengbao Jiang and Pengfei Liu. 2023. GPTScore: Evaluate as You Desire. arxiv:2302.04166\u00a0[cs.CL]"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2305016120"},{"key":"e_1_3_2_1_12_1","unstructured":"Xingwei He Zhenghao Lin Yeyun Gong A-Long Jin Hang Zhang Chen Lin Jian Jiao Siu\u00a0Ming Yiu Nan Duan and Weizhu Chen. 2024. AnnoLLM: Making Large Language Models to Be Better Crowdsourced Annotators. arxiv:2303.16854\u00a0[cs.CL]"},{"key":"e_1_3_2_1_13_1","unstructured":"Tom Kocmi and Christian Federmann. 2023. Large Language Models Are State-of-the-Art Evaluators of Translation Quality. arxiv:2302.14520\u00a0[cs.CL]"},{"key":"e_1_3_2_1_14_1","volume-title":"Advances in Neural Information Processing Systems, S.\u00a0Koyejo, S.\u00a0Mohamed, A.\u00a0Agarwal, D.\u00a0Belgrave, K.\u00a0Cho, and A.\u00a0Oh (Eds.). Vol.\u00a035. Curran Associates","author":"Kojima Takeshi","year":"2022","unstructured":"Takeshi Kojima, Shixiang\u00a0(Shane) Gu, Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa. 2022. Large Language Models are Zero-Shot Reasoners. In Advances in Neural Information Processing Systems, S.\u00a0Koyejo, S.\u00a0Mohamed, A.\u00a0Agarwal, D.\u00a0Belgrave, K.\u00a0Cho, and A.\u00a0Oh (Eds.). Vol.\u00a035. Curran Associates, Inc., 22199\u201322213. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/8bb0d291acd4acf06ef112099c16f326-Paper-Conference.pdf"},{"key":"e_1_3_2_1_15_1","unstructured":"Yuxuan Lei Jianxun Lian Jing Yao Xu Huang Defu Lian and Xing Xie. 2023. RecExplainer: Aligning Large Language Models for Recommendation Model Interpretability. arxiv:2311.10947\u00a0[cs.IR]"},{"key":"e_1_3_2_1_16_1","unstructured":"Lei Li Yongfeng Zhang and Li Chen. 2021. Personalized Transformer for Explainable Recommendation. arxiv:2105.11601\u00a0[cs.IR]"},{"key":"e_1_3_2_1_17_1","volume-title":"Personalized prompt learning for explainable recommendation. arXiv preprint arXiv:2202.07371","author":"Li Lei","year":"2022","unstructured":"Lei Li, Yongfeng Zhang, and Li Chen. 2022. Personalized prompt learning for explainable recommendation. arXiv preprint arXiv:2202.07371 (2022)."},{"key":"e_1_3_2_1_18_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74\u201381. https:\/\/aclanthology.org\/W04-1013"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Yang Liu Dan Iter Yichong Xu Shuohang Wang Ruochen Xu and Chenguang Zhu. 2023. G-Eval: NLG Evaluation using GPT-4 with Better Human Alignment. arxiv:2303.16634\u00a0[cs.CL]","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"e_1_3_2_1_20_1","unstructured":"Yuxuan Liu Tianchi Yang Shaohan Huang Zihan Zhang Haizhen Huang Furu Wei Weiwei Deng Feng Sun and Qi Zhang. 2023. Calibrating LLM-Based Evaluator. arxiv:2309.13308\u00a0[cs.CL]"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3565480"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2010.127"},{"key":"e_1_3_2_1_24_1","volume-title":"BPR: Bayesian Personalized Ranking from Implicit Feedback. CoRR abs\/1205.2618","author":"Rendle Steffen","year":"2012","unstructured":"Steffen Rendle, Christoph Freudenthaler, Zeno Gantner, and Lars Schmidt-Thieme. 2012. BPR: Bayesian Personalized Ranking from Implicit Feedback. CoRR abs\/1205.2618 (2012)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Ohad Rubin Jonathan Herzig and Jonathan Berant. 2022. Learning To Retrieve Prompts for In-Context Learning. arxiv:2112.08633\u00a0[cs.CL]","DOI":"10.18653\/v1\/2022.naacl-main.191"},{"key":"e_1_3_2_1_26_1","volume-title":"BLEURT: Learning Robust Metrics for Text Generation. arxiv:2004.04696\u00a0[cs.CL]","author":"Sellam Thibault","year":"2020","unstructured":"Thibault Sellam, Dipanjan Das, and Ankur\u00a0P. Parikh. 2020. BLEURT: Learning Robust Metrics for Text Generation. arxiv:2004.04696\u00a0[cs.CL]"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Richard Shin Christopher\u00a0H. Lin Sam Thomson Charles Chen Subhro Roy Emmanouil\u00a0Antonios Platanios Adam Pauls Dan Klein Jason Eisner and Benjamin\u00a0Van Durme. 2021. Constrained Language Models Yield Few-Shot Semantic Parsers. arxiv:2104.08768\u00a0[cs.CL]","DOI":"10.18653\/v1\/2021.emnlp-main.608"},{"key":"e_1_3_2_1_28_1","volume-title":"Evaluation Center","author":"L Stufflebeam","unstructured":"Daniel\u00a0L Stufflebeam 1974. Meta-evaluation. Evaluation Center, College of Education, Western Michigan University Kalamazoo."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589335.3648297"},{"key":"e_1_3_2_1_30_1","volume-title":"Neighborhood-Enhanced Supervised Contrastive Learning for Collaborative Filtering","author":"Sun Peijie","year":"2023","unstructured":"Peijie Sun, Le Wu, Kun Zhang, Xiangzhi Chen, and Meng Wang. 2023. Neighborhood-Enhanced Supervised Contrastive Learning for Collaborative Filtering. IEEE Transactions on Knowledge and Data Engineering (2023)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380164"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3483611"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Zhen Tan Alimohammad Beigi Song Wang Ruocheng Guo Amrita Bhattacharjee Bohan Jiang Mansooreh Karami Jundong Li Lu Cheng and Huan Liu. 2024. Large Language Models for Data Annotation: A Survey. arxiv:2402.13446\u00a0[cs.CL]","DOI":"10.18653\/v1\/2024.emnlp-main.54"},{"key":"e_1_3_2_1_34_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian\u00a0Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit\u00a0Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric\u00a0Michael Smith Ranjan Subramanian Xiaoqing\u00a0Ellen Tan Binh Tang Ross Taylor Adina Williams Jian\u00a0Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arxiv:2307.09288\u00a0[cs.CL]"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Jiaan Wang Yunlong Liang Fandong Meng Zengkui Sun Haoxiang Shi Zhixu Li Jinan Xu Jianfeng Qu and Jie Zhou. 2023. Is ChatGPT a Good NLG Evaluator? A Preliminary Study. arxiv:2303.04048\u00a0[cs.CL]","DOI":"10.18653\/v1\/2023.newsum-1.1"},{"key":"e_1_3_2_1_36_1","unstructured":"Yaqing Wang Jiepu Jiang Mingyang Zhang Cheng Li Yi Liang Qiaozhu Mei and Michael Bendersky. 2023. Automated Evaluation of Personalized Text Generation using Large Language Models. arxiv:2310.11593\u00a0[cs.CL]"},{"key":"e_1_3_2_1_37_1","volume-title":"Fei Xia, Ed Chi, Quoc\u00a0V Le, and Denny Zhou.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, brian ichter, Fei Xia, Ed Chi, Quoc\u00a0V Le, and Denny Zhou. 2022. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. In Advances in Neural Information Processing Systems, S.\u00a0Koyejo, S.\u00a0Mohamed, A.\u00a0Agarwal, D.\u00a0Belgrave, K.\u00a0Cho, and A.\u00a0Oh (Eds.). Vol.\u00a035. Curran Associates, Inc., 24824\u201324837. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/9d5609613524ecf4f15af0f7b31abca4-Paper-Conference.pdf"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512269"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474240"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605357"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000066"}],"event":{"name":"RecSys '24: 18th ACM Conference on Recommender Systems","location":"Bari Italy","acronym":"RecSys '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["18th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688075","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640457.3688075","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:29Z","timestamp":1750294709000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688075"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,8]]},"references-count":39,"alternative-id":["10.1145\/3640457.3688075","10.1145\/3640457"],"URL":"https:\/\/doi.org\/10.1145\/3640457.3688075","relation":{},"subject":[],"published":{"date-parts":[[2024,10,8]]},"assertion":[{"value":"2024-10-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}