{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:40:01Z","timestamp":1755866401363,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3730025","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T01:25:28Z","timestamp":1752456328000},"page":"1098-1108","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["LLM-based Search Assistant with Holistically Guided MCTS for Intricate Information Seeking"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0562-9911","authenticated-orcid":false,"given":"Ruiyang","family":"Ren","sequence":"first","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5760-9285","authenticated-orcid":false,"given":"Yuhao","family":"Wang","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0480-5593","authenticated-orcid":false,"given":"Junyi","family":"Li","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7111-6089","authenticated-orcid":false,"given":"Jinhao","family":"Jiang","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8333-6196","authenticated-orcid":false,"given":"Wayne Xin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5199-1428","authenticated-orcid":false,"given":"Wenjie","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6097-7807","authenticated-orcid":false,"given":"Tat-Seng","family":"Chua","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"volume-title":"The Twelfth International Conference on Learning Representations.","author":"Asai Akari","key":"e_1_3_2_1_1_1","unstructured":"Akari Asai, Zeqiu Wu, Yizhong Wang, Avirup Sil, and Hannaneh Hajishirzi. [n.d.]. Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2012.2186810"},{"key":"e_1_3_2_1_3_1","volume-title":"AlphaMath Almost Zero: process Supervision without process. arXiv preprint arXiv:2405.03553","author":"Chen Guoxin","year":"2024","unstructured":"Guoxin Chen, Minpeng Liao, Chengxi Li, and Kai Fan. 2024a. AlphaMath Almost Zero: process Supervision without process. arXiv preprint arXiv:2405.03553 (2024)."},{"key":"e_1_3_2_1_4_1","volume-title":"Mindsearch: Mimicking human minds elicits deep ai searcher. arXiv preprint arXiv:2407.20183","author":"Chen Zehui","year":"2024","unstructured":"Zehui Chen, Kuikun Liu, Qiuchen Wang, Jiangning Liu, Wenwei Zhang, Kai Chen, and Feng Zhao. 2024b. Mindsearch: Mimicking human minds elicits deep ai searcher. arXiv preprint arXiv:2407.20183 (2024)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00370"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSSC.1968.300136"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.580"},{"key":"e_1_3_2_1_8_1","volume-title":"AQA: Adaptive question answering in a society of LLMs via contextual multi-armed bandit. arXiv preprint arXiv:2409.13447","author":"Hoveyda Mohanna","year":"2024","unstructured":"Mohanna Hoveyda, Arjen P de Vries, Maarten de Rijke, Harrie Oosterhuis, and Faegheh Hasibi. 2024. AQA: Adaptive question answering in a society of LLMs via contextual multi-armed bandit. arXiv preprint arXiv:2409.13447 (2024)."},{"key":"e_1_3_2_1_9_1","volume-title":"Yang Song, and Tao Zhang.","author":"Jiang Jinhao","year":"2024","unstructured":"Jinhao Jiang, Jiayi Chen, Junyi Li, Ruiyang Ren, Shijie Wang, Wayne Xin Zhao, Yang Song, and Tao Zhang. 2024a. RAG-Star: Enhancing Deliberative Reasoning with Retrieval Augmented Verification and Refinement. arXiv preprint arXiv:2412.12881 (2024)."},{"key":"e_1_3_2_1_10_1","volume-title":"Wayne Xin Zhao, et al","author":"Jiang Jinhao","year":"2024","unstructured":"Jinhao Jiang, Zhipeng Chen, Yingqian Min, Jie Chen, Xiaoxue Cheng, Jiapeng Wang, Yiru Tang, Haoxiang Sun, Jia Deng, Wayne Xin Zhao, et al., 2024b. Technical Report: Enhancing LLM Reasoning with Reward-guided Tree Search. arXiv preprint arXiv:2411.11694 (2024)."},{"key":"e_1_3_2_1_11_1","volume-title":"fast and slow. Farrar, Straus and Giroux","author":"Kahneman Daniel","year":"2011","unstructured":"Daniel Kahneman. 2011. Thinking, fast and slow. Farrar, Straus and Giroux (2011)."},{"key":"e_1_3_2_1_12_1","volume-title":"Xi Chen, Amirreza Kazemi, Qianyi Sun, Boxing Chen, Dong Li, Xu He, Quan He, Feng Wen, et al.","author":"Kang Jikun","year":"2024","unstructured":"Jikun Kang, Xin Zhe Li, Xi Chen, Amirreza Kazemi, Qianyi Sun, Boxing Chen, Dong Li, Xu He, Quan He, Feng Wen, et al., 2024. Mindstar: Enhancing math reasoning in pre-trained llms at inference time. arXiv preprint arXiv:2405.16265 (2024)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_14_1","volume-title":"Decomposed Prompting: A Modular Approach for Solving Complex Tasks. In The Eleventh International Conference on Learning Representations.","author":"Khot Tushar","year":"2023","unstructured":"Tushar Khot, Harsh Trivedi, Matthew Finlayson, Yao Fu, Kyle Richardson, Peter Clark, and Ashish Sabharwal. 2023. Decomposed Prompting: A Modular Approach for Solving Complex Tasks. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_15_1","volume-title":"Bandit Based Monte-Carlo Planning. In Machine Learning: ECML 2006, 17th European Conference on Machine Learning, Berlin, Germany, September 18-22, 2006, Proceedings (Lecture Notes in Computer Science","volume":"293","author":"Kocsis Levente","year":"2006","unstructured":"Levente Kocsis and Csaba Szepesv\u00e1ri. 2006. Bandit Based Monte-Carlo Planning. In Machine Learning: ECML 2006, 17th European Conference on Machine Learning, Berlin, Germany, September 18-22, 2006, Proceedings (Lecture Notes in Computer Science, Vol. 4212), Johannes F\u00fcrnkranz, Tobias Scheffer, and Myra Spiliopoulou (Eds.). Springer, 282-293."},{"key":"e_1_3_2_1_16_1","volume-title":"Zero-Shot Multi-Hop Question Answering via Monte-Carlo Tree Search with Large Language Models. arXiv preprint arXiv:2409.19382","author":"Lee Seongmin","year":"2024","unstructured":"Seongmin Lee, Jaewook Shin, Youngjin Ahn, Seokin Seo, Ohjoon Kwon, and Kee-Eung Kim. 2024. Zero-Shot Multi-Hop Question Answering via Monte-Carlo Tree Search with Large Language Models. arXiv preprint arXiv:2409.19382 (2024)."},{"key":"e_1_3_2_1_17_1","volume-title":"The Twelfth International Conference on Learning Representations, ICLR 2024","author":"Lightman Hunter","year":"2024","unstructured":"Hunter Lightman, Vineet Kosaraju, Yuri Burda, Harrison Edwards, Bowen Baker, Teddy Lee, Jan Leike, John Schulman, Ilya Sutskever, and Karl Cobbe. 2024. Let's Verify Step by Step. In The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024. OpenReview.net."},{"key":"e_1_3_2_1_18_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74-81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74-81."},{"key":"e_1_3_2_1_19_1","unstructured":"Liangchen Luo Yinxiao Liu Rosanne Liu Samrat Phatale Harsh Lara Yunxuan Li Lei Shu Yun Zhu Lei Meng Jiao Sun et al. 2024. Improve Mathematical Reasoning in Language Models by Automated Process Supervision. arXiv preprint arXiv:2406.06592 (2024)."},{"key":"e_1_3_2_1_20_1","volume-title":"Webgpt: Browser-assisted question-answering with human feedback. arXiv preprint arXiv:2112.09332","author":"Nakano Reiichiro","year":"2021","unstructured":"Reiichiro Nakano, Jacob Hilton, Suchir Balaji, Jeff Wu, Long Ouyang, Christina Kim, Christopher Hesse, Shantanu Jain, Vineet Kosaraju, William Saunders, et al., 2021. Webgpt: Browser-assisted question-answering with human feedback. arXiv preprint arXiv:2112.09332 (2021)."},{"key":"e_1_3_2_1_21_1","volume-title":"Agent q: Advanced reasoning and learning for autonomous ai agents. arXiv preprint arXiv:2408.07199","author":"Putta Pranav","year":"2024","unstructured":"Pranav Putta, Edmund Mills, Naman Garg, Sumeet Motwani, Chelsea Finn, Divyansh Garg, and Rafael Rafailov. 2024. Agent q: Advanced reasoning and learning for autonomous ai agents. arXiv preprint arXiv:2408.07199 (2024)."},{"key":"e_1_3_2_1_22_1","volume-title":"Infogent: An agent-based framework for web information aggregation. arXiv preprint arXiv:2410.19054","author":"Reddy Revanth Gangi","year":"2024","unstructured":"Revanth Gangi Reddy, Sagnik Mukherjee, Jeonghwan Kim, Zhenhailong Wang, Dilek Hakkani-Tur, and Heng Ji. 2024. Infogent: An agent-based framework for web information aggregation. arXiv preprint arXiv:2410.19054 (2024)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.50"},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics. 3697-3715","author":"Ren Ruiyang","year":"2025","unstructured":"Ruiyang Ren, Yuhao Wang, Yingqi Qu, Wayne Xin Zhao, Jing Liu, Hua Wu, Ji-Rong Wen, and Haifeng Wang. 2025. Investigating the Factual Knowledge Boundary of Large Language Models with Retrieval Augmentation. In Proceedings of the 31st International Conference on Computational Linguistics. 3697-3715."},{"key":"e_1_3_2_1_25_1","volume-title":"Rewarding Progress: Scaling Automated Process Verifiers for LLM Reasoning. arXiv preprint arXiv:2410.08146","author":"Setlur Amrith","year":"2024","unstructured":"Amrith Setlur, Chirag Nagpal, Adam Fisch, Xinyang Geng, Jacob Eisenstein, Rishabh Agarwal, Alekh Agarwal, Jonathan Berant, and Aviral Kumar. 2024. Rewarding Progress: Scaling Automated Process Verifiers for LLM Reasoning. arXiv preprint arXiv:2410.08146 (2024)."},{"key":"e_1_3_2_1_26_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Shinn Noah","year":"2024","unstructured":"Noah Shinn, Federico Cassano, Ashwin Gopinath, Karthik Narasimhan, and Shunyu Yao. 2024. Reflexion: Language agents with verbal reinforcement learning. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_27_1","volume-title":"Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al.","author":"Silver David","year":"2016","unstructured":"David Silver, Aja Huang, Chris J Maddison, Arthur Guez, Laurent Sifre, George Van Den Driessche, Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al., 2016b. Mastering the game of Go with deep neural networks and tree search. nature, Vol. 529, 7587 (2016), 484-489."},{"key":"e_1_3_2_1_28_1","volume-title":"Mastering the game of Go with deep neural networks and tree search. Nat","author":"Silver David","year":"2016","unstructured":"David Silver, Aja Huang, Chris J. Maddison, Arthur Guez, Laurent Sifre, George van den Driessche, Julian Schrittwieser, Ioannis Antonoglou, Vedavyas Panneershelvam, Marc Lanctot, Sander Dieleman, Dominik Grewe, John Nham, Nal Kalchbrenner, Ilya Sutskever, Timothy P. Lillicrap, Madeleine Leach, Koray Kavukcuoglu, Thore Graepel, and Demis Hassabis. 2016a. Mastering the game of Go with deep neural networks and tree search. Nat., Vol. 529, 7587 (2016), 484-489."},{"key":"e_1_3_2_1_29_1","unstructured":"David Silver Thomas Hubert Julian Schrittwieser Ioannis Antonoglou Matthew Lai Arthur Guez Marc Lanctot Laurent Sifre Dharshan Kumaran Thore Graepel et al. 2017. Mastering chess and shogi by self-play with a general reinforcement learning algorithm. arXiv preprint arXiv:1712.01815 (2017)."},{"key":"e_1_3_2_1_30_1","volume-title":"Scaling llm test-time compute optimally can be more effective than scaling model parameters. arXiv preprint arXiv:2408.03314","author":"Snell Charlie","year":"2024","unstructured":"Charlie Snell, Jaehoon Lee, Kelvin Xu, and Aviral Kumar. 2024. Scaling llm test-time compute optimally can be more effective than scaling model parameters. arXiv preprint arXiv:2408.03314 (2024)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1076034.1076074"},{"key":"e_1_3_2_1_32_1","volume-title":"The bitter lesson. Incomplete Ideas (blog)","author":"Sutton Richard","year":"2019","unstructured":"Richard Sutton. 2019. The bitter lesson. Incomplete Ideas (blog), Vol. 13, 1 (2019), 38."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1059"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00475"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.557"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2406.14283"},{"key":"e_1_3_2_1_37_1","volume-title":"Improving multi-step reasoning for llms with deliberative planning. arXiv preprint arXiv:2406.14283","author":"Wang Chaojie","year":"2024","unstructured":"Chaojie Wang, Yanchen Deng, Zhiyi Lyu, Liang Zeng, Jujie He, Shuicheng Yan, and Bo An. 2024b. Q*: Improving multi-step reasoning for llms with deliberative planning. arXiv preprint arXiv:2406.14283 (2024)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.585"},{"volume-title":"Self-Consistency Improves Chain of Thought Reasoning in Language Models. In The Eleventh International Conference on Learning Representations.","author":"Wang Xuezhi","key":"e_1_3_2_1_39_1","unstructured":"Xuezhi Wang, Jason Wei, Dale Schuurmans, Quoc V Le, Ed H Chi, Sharan Narang, Aakanksha Chowdhery, and Denny Zhou. [n.d.]. Self-Consistency Improves Chain of Thought Reasoning in Language Models. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_40_1","volume-title":"Jing Liu, and Ji-Rong Wen.","author":"Wang Yuhao","year":"2024","unstructured":"Yuhao Wang, Ruiyang Ren, Junyi Li, Wayne Xin Zhao, Jing Liu, and Ji-Rong Wen. 2024c. REAR: A Relevance-Aware Retrieval-Augmented Framework for Open-Domain Question Answering. arXiv preprint arXiv:2402.17497 (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al., 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems, Vol. 35 (2022), 24824-24837."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645363"},{"volume-title":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing. 2369-2380","author":"Yang Zhilin","key":"e_1_3_2_1_43_1","unstructured":"Zhilin Yang, Peng Qi, Saizheng Zhang, Yoshua Bengio, William Cohen, Ruslan Salakhutdinov, and Christopher D. Manning. 2018. HotpotQA: A Dataset for Diverse, Explainable Multi-hop Question Answering. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing. 2369-2380."},{"key":"e_1_3_2_1_44_1","volume-title":"ReAct: Synergizing Reasoning and Acting in Language Models. In International Conference on Learning Representations (ICLR).","author":"Yao Shunyu","year":"2023","unstructured":"Shunyu Yao, Jeffrey Zhao, Dian Yu, Nan Du, Izhak Shafran, Karthik Narasimhan, and Yuan Cao. 2023. ReAct: Synergizing Reasoning and Acting in Language Models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_45_1","first-page":"25476","volume-title":"Mastering Atari Games with Limited Data. In Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021","author":"Ye Weirui","year":"2021","unstructured":"Weirui Ye, Shaohuai Liu, Thanard Kurutach, Pieter Abbeel, and Yang Gao. 2021. Mastering Atari Games with Limited Data. In Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual. 25476-25488."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2403.09629"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Di Zhang Jianbo Wu Jingdi Lei Tong Che Jiatong Li Tong Xie Xiaoshui Huang Shufei Zhang Marco Pavone Yuqiang Li et al. 2024a. LLaMA-Berry: Pairwise Optimization for O1-like Olympiad-Level Mathematical Reasoning. arXiv preprint arXiv:2410.02884 (2024).","DOI":"10.18653\/v1\/2025.naacl-long.375"},{"key":"e_1_3_2_1_48_1","volume-title":"Llm self-training via process reward guided tree search. arXiv preprint arXiv:2406.03816","author":"Zhang Dan","year":"2024","unstructured":"Dan Zhang, Sining Zhoubian, Ziniu Hu, Yisong Yue, Yuxiao Dong, and Jie Tang. 2024b. Rest-mcts*: Llm self-training via process reward guided tree search. arXiv preprint arXiv:2406.03816 (2024)."},{"key":"e_1_3_2_1_49_1","unstructured":"Wayne Xin Zhao Kun Zhou Junyi Li Tianyi Tang Xiaolei Wang Yupeng Hou Yingqian Min Beichen Zhang Junjie Zhang Zican Dong et al. 2023. A survey of large language models. arXiv preprint arXiv:2303.18223 (2023)."},{"volume-title":"Forty-first International Conference on Machine Learning.","author":"Zhou Andy","key":"e_1_3_2_1_50_1","unstructured":"Andy Zhou, Kai Yan, Michal Shlapentokh-Rothman, Haohan Wang, and Yu-Xiong Wang. [n.d.]. Language Agent Tree Search Unifies Reasoning, Acting, and Planning in Language Models. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-short.2"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10228-y"}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Padua Italy","acronym":"SIGIR '25"},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3730025","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:15:33Z","timestamp":1755864933000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3730025"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":52,"alternative-id":["10.1145\/3726302.3730025","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3730025","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}