{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:00:08Z","timestamp":1775815208700,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3730080","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T01:18:36Z","timestamp":1752455916000},"page":"75-84","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Search-Based Interaction For Conversation Recommendation via Generative Reward Model Based Simulated User"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3685-3606","authenticated-orcid":false,"given":"Xiaolei","family":"Wang","sequence":"first","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8038-6897","authenticated-orcid":false,"given":"Chunxuan","family":"Xia","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0480-5593","authenticated-orcid":false,"given":"Junyi","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science, National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3864-4964","authenticated-orcid":false,"given":"Fanzhe","family":"Meng","sequence":"additional","affiliation":[{"name":"School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2084-3088","authenticated-orcid":false,"given":"Lei","family":"Huang","sequence":"additional","affiliation":[{"name":"Meituan, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0080-8988","authenticated-orcid":false,"given":"Jinpeng","family":"Wang","sequence":"additional","affiliation":[{"name":"Meituan, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8333-6196","authenticated-orcid":false,"given":"Wayne Xin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9777-9676","authenticated-orcid":false,"given":"Ji-Rong","family":"Wen","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Marah Abdin Jyoti Aneja Harkirat Behl S\u00e9bastien Bubeck Ronen Eldan Suriya Gunasekar Michael Harrison Russell J Hewett Mojan Javaheripi Piero Kauffmann et al. 2024. Phi-4 technical report. arXiv preprint arXiv:2412.08905 (2024)."},{"key":"e_1_3_2_1_2_1","first-page":"51","article-title":"Towards a Taxonomy of User Feedback Intents for Conversational Recommendations","volume":"2431","author":"Cai Wanling","year":"2019","unstructured":"Wanling Cai and Li Chen. 2019. Towards a Taxonomy of User Feedback Intents for Conversational Recommendations. RecSys (Late-Breaking Results), Vol. 2431 (2019), 51-55.","journal-title":"RecSys (Late-Breaking Results)"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340631.3394856"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641289"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11257-011-9108-6"},{"key":"e_1_3_2_1_6_1","volume-title":"Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174","author":"Chen Tianqi","year":"2016","unstructured":"Tianqi Chen, Bing Xu, Chiyuan Zhang, and Carlos Guestrin. 2016. Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174 (2016)."},{"key":"e_1_3_2_1_7_1","unstructured":"Karl Cobbe Vineet Kosaraju Mohammad Bavarian Mark Chen Heewoo Jun Lukasz Kaiser Matthias Plappert Jerry Tworek Jacob Hilton Reiichiro Nakano et al. 2021. Training verifiers to solve math word problems. arXiv preprint arXiv:2110.14168 (2021)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657755"},{"key":"e_1_3_2_1_9_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Dao Tri","unstructured":"Tri Dao. [n.d.]. FlashAttention-2: Faster Attention with Better Parallelism and Work Partitioning. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_10_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2021.06.002"},{"key":"e_1_3_2_1_12_1","volume-title":"Inspired: Toward sociable recommendation dialog systems. arXiv preprint arXiv:2009.14306","author":"Hayati Shirley Anugrah","year":"2020","unstructured":"Shirley Anugrah Hayati, Dongyeop Kang, Qingxiaoyang Zhu, Weiyan Shi, and Zhou Yu. 2020. Inspired: Toward sociable recommendation dialog systems. arXiv preprint arXiv:2009.14306 (2020)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614949"},{"key":"e_1_3_2_1_14_1","volume-title":"Reindex-Then-Adapt: Improving Large Language Models for Conversational Recommendation. arXiv preprint arXiv:2405.12119","author":"He Zhankui","year":"2024","unstructured":"Zhankui He, Zhouhang Xie, Harald Steck, Dawen Liang, Rahul Jha, Nathan Kallus, and Julian McAuley. 2024. Reindex-Then-Adapt: Improving Large Language Models for Conversational Recommendation. arXiv preprint arXiv:2405.12119 (2024)."},{"key":"e_1_3_2_1_15_1","volume-title":"Concept-An Evaluation Protocol on Conversation Recommender Systems with System-and User-centric Factors. arXiv preprint arXiv:2404.03304","author":"Huang Chen","year":"2024","unstructured":"Chen Huang, Peixin Qin, Yang Deng, Wenqiang Lei, Jiancheng Lv, and Tat-Seng Chua. 2024. Concept-An Evaluation Protocol on Conversation Recommender Systems with System-and User-centric Factors. arXiv preprint arXiv:2404.03304 (2024)."},{"key":"e_1_3_2_1_16_1","unstructured":"Aaron Hurst Adam Lerer Adam P Goucher Adam Perelman Aditya Ramesh Aidan Clark AJ Ostrow Akila Welihinda Alan Hayes Alec Radford et al. 2024. Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3453154"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472307.3484164"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371769"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403258"},{"key":"e_1_3_2_1_21_1","volume-title":"Hannes Schulz, Vincent Michalski, Laurent Charlin, and Chris Pal.","author":"Li Raymond","year":"2018","unstructured":"Raymond Li, Samira Ebrahimi Kahou, Hannes Schulz, Vincent Michalski, Laurent Charlin, and Chris Pal. 2018. Towards deep conversational recommendations. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3446427"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.167"},{"key":"e_1_3_2_1_24_1","volume-title":"Step. In The Twelfth International Conference on Learning Representations.","author":"Lightman Hunter","unstructured":"Hunter Lightman, Vineet Kosaraju, Yuri Burda, Harrison Edwards, Bowen Baker, Teddy Lee, Jan Leike, John Schulman, Ilya Sutskever, and Karl Cobbe. [n.d.]. Let's Verify Step by Step. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_25_1","volume-title":"Rafael Rafailov, Chase Blagden, Nathan Lile, Louis Castricato, Jan-Philipp Fr\u00e4nken, Chelsea Finn, and Alon Albalak.","author":"Mahan Dakota","year":"2024","unstructured":"Dakota Mahan, Duy Van Phung, Rafael Rafailov, Chase Blagden, Nathan Lile, Louis Castricato, Jan-Philipp Fr\u00e4nken, Chelsea Finn, and Alon Albalak. 2024. Generative reward models. arXiv preprint arXiv:2410.12832 (2024)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606763"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"e_1_3_2_1_28_1","volume-title":"Scaling llm test-time compute optimally can be more effective than scaling model parameters. arXiv preprint arXiv:2408.03314","author":"Snell Charlie","year":"2024","unstructured":"Charlie Snell, Jaehoon Lee, Kelvin Xu, and Aviral Kumar. 2024. Scaling llm test-time compute optimally can be more effective than scaling model parameters. arXiv preprint arXiv:2408.03314 (2024)."},{"key":"e_1_3_2_1_29_1","volume-title":"Forty-first International Conference on Machine Learning.","author":"Wan Ziyu","year":"2024","unstructured":"Ziyu Wan, Xidong Feng, Muning Wen, Stephen Marcus McAleer, Ying Wen, Weinan Zhang, and Jun Wang. 2024. Alphazero-like tree-search can guide large language model decoding and training. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.621"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599387"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539382"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688191"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-naacl.4"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688146"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers). 1490-1504","author":"He Zhankui","year":"2024","unstructured":"Se-eun Yoon, Zhankui He, Jessica Echterhoff, and Julian McAuley. 2024. Evaluating Large Language Models as Generative User Simulators for Conversational Recommendation. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers). 1490-1504."},{"key":"e_1_3_2_1_37_1","volume-title":"The 4th Workshop on Mathematical Reasoning and AI at NeurIPS'24","author":"Zhang Lunjun","unstructured":"Lunjun Zhang, Arian Hosseini, Hritik Bansal, Mehran Kazemi, Aviral Kumar, and Rishabh Agarwal. [n.d.]. Generative Verifiers: Reward Modeling as Next-Token Prediction. In The 4th Workshop on Mathematical Reasoning and AI at NeurIPS'24."},{"key":"e_1_3_2_1_38_1","unstructured":"Shengyu Zhang Linfeng Dong Xiaoya Li Sen Zhang Xiaofei Sun Shuhe Wang Jiwei Li Runyi Hu Tianwei Zhang Fei Wu et al. 2023a. Instruction tuning for large language models: A survey. arXiv preprint arXiv:2308.10792 (2023)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570426"},{"key":"e_1_3_2_1_40_1","unstructured":"Wayne Xin Zhao Kun Zhou Junyi Li Tianyi Tang Xiaolei Wang Yupeng Hou Yingqian Min Beichen Zhang Junjie Zhang Zican Dong et al. 2023a. A Survey of Large Language Models. arXiv preprint arXiv:2303.18223 (2023)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3608812"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-demo.22"},{"key":"e_1_3_2_1_43_1","volume-title":"Human-Involved User Simulator Framework for Conversational Recommender Systems. arXiv preprint arXiv:2405.08035","author":"Zhu Lixi","year":"2024","unstructured":"Lixi Zhu, Xiaowen Huang, and Jitao Sang. 2024. A LLM-based Controllable, Scalable, Human-Involved User Simulator Framework for Conversational Recommender Systems. arXiv preprint arXiv:2405.08035 (2024)."}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Padua Italy","acronym":"SIGIR '25","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3730080","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T18:32:42Z","timestamp":1755887562000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3730080"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":43,"alternative-id":["10.1145\/3726302.3730080","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3730080","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}