{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:11:05Z","timestamp":1757617865611,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","funder":[{"name":"Hong Kong Baptist University IG-FNRA Project","award":["RC-FNRA-IG\/21-22\/SCI\/01"],"award-info":[{"award-number":["RC-FNRA-IG\/21-22\/SCI\/01"]}]},{"name":"Key Research Partnership Scheme","award":["KRPS\/23-24\/02"],"award-info":[{"award-number":["KRPS\/23-24\/02"]}]},{"name":"NSFC\/RGC Joint Research Scheme","award":["N\\\\_HKBU214\/24"],"award-info":[{"award-number":["N\\\\_HKBU214\/24"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,22]]},"DOI":"10.1145\/3705328.3748167","type":"proceedings-article","created":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T10:48:44Z","timestamp":1757155724000},"page":"746-754","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Exploring the Potential of LLMs for Serendipity Evaluation in Recommender Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-3233-2913","authenticated-orcid":false,"given":"Li","family":"Kang","sequence":"first","affiliation":[{"name":"Hong Kong Baptist University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1427-4139","authenticated-orcid":false,"given":"Yuhan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Hong Kong Baptist University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5842-838X","authenticated-orcid":false,"given":"Li","family":"Chen","sequence":"additional","affiliation":[{"name":"Hong Kong Baptist University, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,9,7]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Josh Achiam Adler et\u00a0al. 2023. GPT-4 Technical Report. arXiv (2023)."},{"key":"e_1_3_3_1_3_2","unstructured":"Zahra Ashktorab Michael Desmond Qian Pan James\u00a0M Johnson Martin\u00a0Santillan Cooper Elizabeth\u00a0M Daly Rahul Nair Tejaswini Pedapati Swapnaja Achintalwar and Werner Geyer. 2024. Aligning Human and LLM Judgments: Insights from EvalAssist on Task-Specific Evaluations and AI-assisted Assessment Strategy Preferences. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.00873 (2024)."},{"key":"e_1_3_3_1_4_2","unstructured":"Anna Bavaresco Raffaella Bernardi Leonardo Bertolazzi Desmond Elliott Raquel Fern\u00e1ndez Albert Gatt Esam Ghaleb Mario Giulianelli Michael Hanna Alexander Koller et\u00a0al. 2024. LLMs Instead of Human Judges? A Large-Scale Empirical Study Across 20 NLP Evaluation Tasks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.18403 (2024)."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688017"},{"key":"e_1_3_3_1_6_2","first-page":"240","volume-title":"WWW","author":"Chen Li","year":"2019","unstructured":"Li Chen, Yonghua Yang, Ningxia Wang, Keping Yang, and Quan Yuan. 2019. How Serendipity Improves User Satisfaction with Recommendations? A Large-Scale User Evaluation. In WWW. 240\u2013250."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Zhe Fu and Xi Niu. 2023. Modeling Users\u2019 Curiosity in Recommender Systems. TKDD 18 1 (2023) 1\u201323.","DOI":"10.1145\/3617598"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664190.3672521"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Zhe Fu Xi Niu and Mary\u00a0Lou Maher. 2023. Deep Learning Models for Serendipity Recommendations: A Survey and New Perspectives. Comput. Surveys 56 1 (2023) 1\u201326.","DOI":"10.1145\/3605145"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Zhe Fu Xi Niu Xiangcheng Wu and Ruhani Rahman. 2025. A Deep Learning Model for Cross-Domain Serendipity Recommendations. TORS 3 3 (2025) 1\u201321.","DOI":"10.1145\/3690654"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591787"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Samuel\u00a0D Gosling Peter\u00a0J Rentfrow and William\u00a0B Swann\u00a0Jr. 2003. A Very Brief Measure of the Big-Five Personality Domains. Journal of Research in personality 37 6 (2003) 504\u2013528.","DOI":"10.1016\/S0092-6566(03)00046-1"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3608851"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Todd\u00a0B Kashdan Matthew\u00a0W Gallagher Paul\u00a0J Silvia Beate\u00a0P Winterstein William\u00a0E Breen Daniel Terhar and Michael\u00a0F Steger. 2009. The Curiosity And Exploration Inventory-II: Development Factor Structure And Psychometrics. Journal of research in personality 43 6 (2009) 987\u2013998.","DOI":"10.1016\/j.jrp.2009.04.011"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3167132.3167276"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3576840.3578310"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627508.3638342"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Denis Kotkov Jari Veijalainen and Shuaiqiang Wang. 2020. How Does Serendipity Affect Diversity in Recommender Systems? A Serendipity-Oriented Greedy Algorithm. Computing 102 (2020) 393\u2013411.","DOI":"10.1007\/s00607-018-0687-5"},{"key":"e_1_3_3_1_19_2","unstructured":"Haitao Li Qian Dong Junjie Chen Huixue Su Yujia Zhou Qingyao Ai Ziyi Ye and Yiqun Liu. 2024. LLMs-as-Judges: A Comprehensive Survey on LLM-Based Evaluation Methods. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.05579 (2024)."},{"key":"e_1_3_3_1_20_2","unstructured":"Jiaqi Li Mengmeng Wang Zilong Zheng and Muhan Zhang. 2023. LooGLE: Can Long-Context Language Models Understand Long Contexts? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.04939 (2023)."},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412238"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380100"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Xiang Liu Peijie Dong Xuming Hu and Xiaowen Chu. 2024. Longgenbench: Long-context Generation Benchmark. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.04199 (2024).","DOI":"10.18653\/v1\/2024.findings-emnlp.48"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3269268"},{"key":"e_1_3_3_1_25_2","unstructured":"Bhrij Patel Souradip Chakraborty Wesley\u00a0A Suttle Mengdi Wang Amrit\u00a0Singh Bedi and Dinesh Manocha. 2024. AIME: AI System Optimization via Multiple LLM Evaluators. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.03131 (2024)."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/2043932.2043962"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Yu Tokutake and Kazushi Okamoto. 2024. Can Large Language Models Assess Serendipity in Recommender Systems? JACIII 28 6 (2024) 1263\u20131272.","DOI":"10.20965\/jaciii.2024.p1263"},{"key":"e_1_3_3_1_28_2","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et\u00a0al. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.09288 (2023)."},{"key":"e_1_3_3_1_29_2","unstructured":"Yu-Min Tseng Wei-Lin Chen Chung-Chi Chen and Hsin-Hsi Chen. 2024. Are Expert-Level Language Models Expert-Level Annotators? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.03254 (2024)."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688161"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Lei Wang Jingsen Zhang Hao Yang Zhi-Yuan Chen Jiakai Tang Zeyu Zhang Xu Chen Yankai Lin Hao Sun Ruihua Song et\u00a0al. 2025. User Behavior Simulation with Large Language Model-Based Agents. TOIS 43 2 (2025) 1\u201337.","DOI":"10.1145\/3708985"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Ningxia Wang and Li Chen. 2023. How Do Item Features and User Characteristics Affect Users\u2019 Perceptions of Recommendation Serendipity? A Cross-Domain Analysis. UMUAI 33 3 (2023) 727\u2013765.","DOI":"10.1007\/s11257-022-09350-x"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3340631.3394863"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.820"},{"key":"e_1_3_3_1_35_2","unstructured":"Yunjia Xi Muyan Weng Wen Chen Chao Yi Dian Chen Gaoyang Guo Mao Zhang Jian Wu Yuning Jiang Qingwen Liu et\u00a0al. 2025. Bursting Filter Bubble: Enhancing Serendipity Recommendations with Aligned Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.13539 (2025)."},{"key":"e_1_3_3_1_36_2","unstructured":"An Yang Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chengyuan Li Dayiheng Liu Fei Huang Haoran Wei et\u00a0al. 2024. Qwen2.5 Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.15115 (2024)."},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482394"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688075"},{"key":"e_1_3_3_1_39_2","unstructured":"Jun Zhao Zhihao Zhang Luhui Gao Qi Zhang Tao Gui and Xuanjing Huang. 2024. Llama Beyond English: An Empirical Study on Language Capability Transfer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.01055 (2024)."},{"key":"e_1_3_3_1_40_2","first-page":"315","volume-title":"SIGIR","author":"Zhao Pengfei","year":"2016","unstructured":"Pengfei Zhao and Dik\u00a0Lun Lee. 2016. How Much Novelty Is Relevant? It Depends on Your Curiosity. In SIGIR. 315\u2013324."},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i12.33462"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688149"},{"key":"e_1_3_3_1_43_2","first-page":"1726","volume-title":"WWW","author":"Zhu Lixi","year":"2024","unstructured":"Lixi Zhu, Xiaowen Huang, and Jitao Sang. 2024. How Reliable Is Your Simulator? Analysis on the Limitations of Current LLM-Based User Simulators for Conversational Recommendation. In WWW. 1726\u20131732."},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"Reza\u00a0Jafari Ziarani and Reza Ravanmehr. 2021. Serendipity in Recommender Systems: A Systematic Literature Review. Journal of Computer Science and Technology 36 (2021) 375\u2013396.","DOI":"10.1007\/s11390-020-0135-9"}],"event":{"name":"RecSys '25: Nineteenth ACM Conference on Recommender Systems","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGIR ACM Special Interest Group on Information Retrieval","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Prague Czech Republic","acronym":"RecSys '25"},"container-title":["Proceedings of the Nineteenth ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3705328.3748167","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T11:47:46Z","timestamp":1757159266000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3705328.3748167"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,7]]},"references-count":43,"alternative-id":["10.1145\/3705328.3748167","10.1145\/3705328"],"URL":"https:\/\/doi.org\/10.1145\/3705328.3748167","relation":{},"subject":[],"published":{"date-parts":[[2025,9,7]]},"assertion":[{"value":"2025-09-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}