{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,4]],"date-time":"2026-07-04T08:20:10Z","timestamp":1783153210076,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":71,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["62272467"],"award-info":[{"award-number":["62272467"]}]},{"name":"the China Postdoctoral Science Foundation","award":["2025T180440"],"award-info":[{"award-number":["2025T180440"]}]},{"name":"the Outstanding Innovative Talents Cultivation Funded Programs 2026 of Renmin University of China","award":["None"],"award-info":[{"award-number":["None"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3774904.3792301","type":"proceedings-article","created":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T21:54:39Z","timestamp":1775771679000},"page":"2126-2137","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Toward Generalized Web Agent Training: A Deep Dive into Entropy-Balanced Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2318-0281","authenticated-orcid":false,"given":"Guanting","family":"Dong","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2008-343X","authenticated-orcid":false,"given":"Licheng","family":"Bao","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9327-9173","authenticated-orcid":false,"given":"Zhongyuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2698-4750","authenticated-orcid":false,"given":"Kangzhi","family":"Zhao","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0708-418X","authenticated-orcid":false,"given":"Xiaoxi","family":"Li","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4808-1534","authenticated-orcid":false,"given":"Jiajie","family":"Jin","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0571-2239","authenticated-orcid":false,"given":"Jinghan","family":"Yang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4499-7581","authenticated-orcid":false,"given":"Hangyu","family":"Mao","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0413-8058","authenticated-orcid":false,"given":"Fuzheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3636-3618","authenticated-orcid":false,"given":"Kun","family":"Gai","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8550-279X","authenticated-orcid":false,"given":"Guorui","family":"Zhou","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9432-3251","authenticated-orcid":false,"given":"Yutao","family":"Zhu","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9777-9676","authenticated-orcid":false,"given":"Ji-Rong","family":"Wen","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9781-948X","authenticated-orcid":false,"given":"Zhicheng","family":"Dou","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,4,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2402.03216"},{"key":"e_1_3_2_1_2_1","unstructured":"Kaiyuan Chen Yixin Ren Yang Liu Xiaobo Hu Haotong Tian Tianbao Xie Fangfu Liu Haoye Zhang Hongzhang Liu Yuan Gong et al. 2025c. xbench: Tracking Agents Productivity Scaling with Profession-Aligned Real-World Evaluations. arXiv preprint arXiv:2506.13651 (2025)."},{"key":"e_1_3_2_1_3_1","unstructured":"Mingyang Chen Tianpeng Li Haoze Sun Yijie Zhou Chenzheng Zhu Haofen Wang Jeff Z. Pan Wen Zhang Huajun Chen Fan Yang Zenan Zhou and Weipeng Chen. 2025b. ReSearch: Learning to Reason with Search for LLMs via Reinforcement Learning. arXiv:cs.AI\/2503.19470 https:\/\/arxiv.org\/abs\/2503.19470"},{"key":"e_1_3_2_1_4_1","volume-title":"Revisiting RAG Ensemble: A Theoretical and Mechanistic Analysis of Multi-RAG System Collaboration. arXiv preprint arXiv:2508.13828","author":"Chen Yifei","year":"2025","unstructured":"Yifei Chen, Guanting Dong, Yutao Zhu, and Zhicheng Dou. 2025a. Revisiting RAG Ensemble: A Theoretical and Mechanistic Analysis of Multi-RAG System Collaboration. arXiv preprint arXiv:2508.13828 (2025)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2506.14758"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2501.17161"},{"key":"e_1_3_2_1_7_1","volume-title":"Training Verifiers to Solve Math Word Problems. arXiv preprint arXiv:2110.14168","author":"Cobbe Karl","year":"2021","unstructured":"Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser, Matthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, Christopher Hesse, and John Schulman. 2021. Training Verifiers to Solve Math Word Problems. arXiv preprint arXiv:2110.14168 (2021)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2505.16410"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2406.13542"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2507.19849"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2406.18676"},{"key":"e_1_3_2_1_12_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/3001460.3001507"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2505.10978"},{"key":"e_1_3_2_1_15_1","volume-title":"Group-in-Group Policy Optimization for LLM Agent Training. arXiv preprint arXiv:2505.10978","author":"Feng Lang","year":"2025","unstructured":"Lang Feng, Zhenghai Xue, Tingcong Liu, and Bo An. 2025b. Group-in-Group Policy Optimization for LLM Agent Training. arXiv preprint arXiv:2505.10978 (2025)."},{"key":"e_1_3_2_1_16_1","unstructured":"Jiaxuan Gao Wei Fu Minyang Xie Shusheng Xu Chuyi He Zhiyu Mei Banghua Zhu and Yi Wu. 2025. Beyond Ten Turns: Unlocking Long-Horizon Agentic Search with Large-Scale Asynchronous RL. arXiv:cs.CL\/2508.07976 https:\/\/arxiv.org\/abs\/2508.07976"},{"key":"e_1_3_2_1_17_1","volume-title":"ToRA: A Tool-Integrated Reasoning Agent for Mathematical Problem Solving. In The Twelfth International Conference on Learning Representations, ICLR 2024","author":"Gou Zhibin","year":"2024","unstructured":"Zhibin Gou, Zhihong Shao, Yeyun Gong, Yelong Shen, Yujiu Yang, Minlie Huang, Nan Duan, and Weizhu Chen. 2024. ToRA: A Tool-Integrated Reasoning Agent for Mathematical Problem Solving. In The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024. OpenReview.net. https:\/\/openreview.net\/forum?id=Ep0TtjVoap"},{"key":"e_1_3_2_1_18_1","unstructured":"Daya Guo Dejian Yang Haowei Zhang Junxiao Song Ruoyu Zhang Runxin Xu Qihao Zhu Shirong Ma Peiyi Wang Xiao Bi et al. 2025. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)."},{"key":"e_1_3_2_1_19_1","volume-title":"REALM: Retrieval-Augmented Language Model Pre-Training. CoRR","author":"Guu Kelvin","year":"2020","unstructured":"Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat, and Ming-Wei Chang. 2020. REALM: Retrieval-Augmented Language Model Pre-Training. CoRR, Vol. abs\/2002.08909 (2020). arXiv:2002.08909 https:\/\/arxiv.org\/abs\/2002.08909"},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Saurav Kadavath, Akul Arora, Steven Basart, Eric Tang, Dawn Song, and Jacob Steinhardt. 2021. Measuring Mathematical Problem Solving With the MATH Dataset. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021, December 2021, virtual, Joaquin Vanschoren and Sai-Kit Yeung (Eds.). https:\/\/datasets-benchmarks-proceedings.neurips.cc\/paper\/2021\/hash\/be83ab3ecd0db773eb2dc1b0a17836a1-Abstract-round2.html"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2020.COLING-MAIN.580"},{"key":"e_1_3_2_1_22_1","volume-title":"Reinforce: A simple and efficient approach for aligning large language models. arXiv preprint arXiv:2501.03262","author":"Hu Jian","year":"2025","unstructured":"Jian Hu. 2025. Reinforce: A simple and efficient approach for aligning large language models. arXiv preprint arXiv:2501.03262 (2025)."},{"key":"e_1_3_2_1_23_1","unstructured":"Aaron Hurst Adam Lerer Adam P Goucher Adam Perelman Aditya Ramesh Aidan Clark AJ Ostrow Akila Welihinda Alan Hayes Alec Radford et al. 2024. Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)."},{"key":"e_1_3_2_1_24_1","unstructured":"Yuxiang Ji Ziyu Ma Yong Wang Guanhua Chen Xiangxiang Chu and Liaoni Wu. 2025. Tree Search for LLM Agent Reinforcement Learning. arXiv:cs.LG\/2509.21240 https:\/\/arxiv.org\/abs\/2509.21240"},{"key":"e_1_3_2_1_25_1","unstructured":"Dongfu Jiang Yi Lu Zhuofeng Li Zhiheng Lyu Ping Nie Haozhe Wang Alex Su Hui Chen Kai Zou Chao Du et al. 2025. VerlTool: Towards Holistic Agentic Reinforcement Learning with Tool Use. arXiv preprint arXiv:2509.01055 (2025)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2503.09516"},{"key":"e_1_3_2_1_27_1","unstructured":"Jiajie Jin Xiaoxi Li Guanting Dong Yuyao Zhang Yutao Zhu Yang Zhao Hongjin Qian and Zhicheng Dou. 2025a. Decoupled Planning and Execution: A Hierarchical Reasoning Framework for Deep Search. arXiv:cs.AI\/2507.02652 https:\/\/arxiv.org\/abs\/2507.02652"},{"key":"e_1_3_2_1_28_1","unstructured":"Satyapriya Krishna Kalpesh Krishna Anhad Mohananey Steven Schwarcz Adam Stambler Shyam Upadhyay and Manaal Faruqui. 2024. Fact Fetch and Reason: A Unified Evaluation of Retrieval-Augmented Generation. arXiv:cs.CL\/2409.12941 https:\/\/arxiv.org\/abs\/2409.12941"},{"key":"e_1_3_2_1_29_1","volume-title":"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020a. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.). https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/6b493230205f780e1bc26945df7481e5-Abstract.html"},{"key":"e_1_3_2_1_30_1","volume-title":"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020","author":"Lewis Patrick S. H.","year":"2020","unstructured":"Patrick S. H. Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020b. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.). https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/6b493230205f780e1bc26945df7481e5-Abstract.html"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2407.04078"},{"key":"e_1_3_2_1_32_1","unstructured":"Kuan Li Zhongwang Zhang Huifeng Yin Liwen Zhang Litu Ou Jialong Wu Wenbiao Yin Baixuan Li Zhengwei Tao Xinyu Wang Weizhou Shen Junkai Zhang Dingchu Zhang Xixi Wu Yong Jiang Ming Yan Pengjun Xie Fei Huang and Jingren Zhou. 2025 e. WebSailor: Navigating Super-human Reasoning for Web Agent. arXiv:cs.CL\/2507.02592 https:\/\/arxiv.org\/abs\/2507.02592"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2501.05366"},{"key":"e_1_3_2_1_34_1","unstructured":"Xiaoxi Li Wenxiang Jiao Jiarui Jin Guanting Dong Jiajie Jin Yinuo Wang Hao Wang Yutao Zhu Ji-Rong Wen Yuan Lu and Zhicheng Dou. 2025c. DeepAgent: A General Reasoning Agent with Scalable Toolsets. arXiv:cs.AI\/2510.21618 https:\/\/arxiv.org\/abs\/2510.21618"},{"key":"e_1_3_2_1_35_1","volume-title":"WebThinker: Empowering Large Reasoning Models with Deep Research Capability. arXiv preprint arXiv:2504.21776","author":"Li Xiaoxi","year":"2025","unstructured":"Xiaoxi Li, Jiajie Jin, Guanting Dong, Hongjin Qian, Yutao Zhu, Yongkang Wu, Ji-Rong Wen, and Zhicheng Dou. 2025d. WebThinker: Empowering Large Reasoning Models with Deep Research Capability. arXiv preprint arXiv:2504.21776 (2025)."},{"key":"e_1_3_2_1_36_1","volume-title":"Treepo: Bridging the gap of policy optimization and efficacy and inference efficiency with heuristic tree-based modeling. arXiv preprint arXiv:2508.17445","author":"Li Yizhi","year":"2025","unstructured":"Yizhi Li, Qingshui Gu, Zhoufutu Wen, Ziniu Li, Tianshun Xing, Shuyue Guo, Tianyu Zheng, Xin Zhou, Xingwei Qu, Wangchunshu Zhou, et al., 2025b. Treepo: Bridging the gap of policy optimization and efficacy and inference efficiency with heuristic tree-based modeling. arXiv preprint arXiv:2508.17445 (2025)."},{"key":"e_1_3_2_1_37_1","volume-title":"The Twelfth International Conference on Learning Representations, ICLR 2024","author":"Lightman Hunter","year":"2024","unstructured":"Hunter Lightman, Vineet Kosaraju, Yuri Burda, Harrison Edwards, Bowen Baker, Teddy Lee, Jan Leike, John Schulman, Ilya Sutskever, and Karl Cobbe. 2024. Let's Verify Step by Step. In The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024. OpenReview.net. https:\/\/openreview.net\/forum?id=v8L0pN6EOi"},{"key":"e_1_3_2_1_38_1","unstructured":"Zihe Liu Jiashun Liu Yancheng He Weixun Wang Jiaheng Liu Ling Pan Xinyu Hu Shaopan Xiong Ju Huang Jian Hu et al. 2025. Part I: Tricks or traps? A deep dive into RL for LLM reasoning. arXiv preprint arXiv:2508.08221 (2025)."},{"key":"e_1_3_2_1_39_1","volume-title":"The Twelfth International Conference on Learning Representations, ICLR 2024","author":"Mialon Gr\u00e9goire","year":"2024","unstructured":"Gr\u00e9goire Mialon, Cl\u00e9mentine Fourrier, Thomas Wolf, Yann LeCun, and Thomas Scialom. 2024. GAIA: a benchmark for General AI Assistants. In The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024. OpenReview.net. https:\/\/openreview.net\/forum?id=fibxvahvs3"},{"key":"e_1_3_2_1_40_1","unstructured":"MiniMax. 2025. MiniMax-M1: Scaling Test-Time Compute Efficiently with Lightning Attention. arXiv:cs.CL\/2506.13585 https:\/\/arxiv.org\/abs\/2506.13585"},{"key":"e_1_3_2_1_41_1","unstructured":"OpenAI. 2024. Learning to Reason with LLMs. https:\/\/openai.com\/index\/learning-to-reason-with-llms"},{"key":"e_1_3_2_1_42_1","volume-title":"Mohamed Shaaban, John Ling, Sean Shi, et al.","author":"Phan Long","year":"2025","unstructured":"Long Phan, Alice Gatti, Ziwen Han, Nathaniel Li, Josephina Hu, Hugh Zhang, Chen Bo Calvin Zhang, Mohamed Shaaban, John Ling, Sean Shi, et al., 2025. Humanity's last exam. arXiv preprint arXiv:2501.14249 (2025)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2023.FINDINGS-EMNLP.378"},{"key":"e_1_3_2_1_44_1","unstructured":"Qwen: An Yang Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chengyuan Li Dayiheng Liu Fei Huang Haoran Wei Huan Lin Jian Yang Jianhong Tu Jianwei Zhang Jianxin Yang Jiaxi Yang Jingren Zhou Junyang Lin Kai Dang Keming Lu Keqin Bao Kexin Yang Le Yu Mei Li Mingfeng Xue Pei Zhang Qin Zhu Rui Men Runji Lin Tianhao Li Tingyu Xia Xingzhang Ren Xuancheng Ren Yang Fan Yang Su Yichang Zhang Yu Wan Yuqiong Liu Zeyu Cui Zhenru Zhang and Zihan Qiu. 2024. Qwen2.5 Technical Report. arXiv:cs.CL\/2412.15115 https:\/\/arxiv.org\/abs\/2412.15115"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2402.03300"},{"key":"e_1_3_2_1_46_1","volume-title":"HybridFlow: A Flexible and Efficient RLHF Framework. arXiv preprint arXiv: 2409.19256","author":"Sheng Guangming","year":"2024","unstructured":"Guangming Sheng, Chi Zhang, Zilingfeng Ye, Xibin Wu, Wang Zhang, Ru Zhang, Yanghua Peng, Haibin Lin, and Chuan Wu. 2024. HybridFlow: A Flexible and Efficient RLHF Framework. arXiv preprint arXiv: 2409.19256 (2024)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2503.05592"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2505.17005"},{"key":"e_1_3_2_1_49_1","unstructured":"Liangcai Su Zhen Zhang Guangyu Li Zhuo Chen Chenxi Wang Maojia Song Xinyu Wang Kuan Li Jialong Wu Xuanzhong Chen et al. 2025b. Scaling Agents via Continual Pre-training. arXiv preprint arXiv:2509.13310 (2025)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2508.07629"},{"key":"e_1_3_2_1_51_1","unstructured":"Kimi Team Yifan Bai Yiping Bao Guanduo Chen Jiahao Chen Ningxin Chen Ruijue Chen Yanru Chen Yuankun Chen Yutian Chen et al. 2025a. Kimi k2: Open agentic intelligence. arXiv preprint arXiv:2507.20534 (2025)."},{"key":"e_1_3_2_1_52_1","unstructured":"Kimi Team Angang Du Bofei Gao Bowei Xing Changjiu Jiang Cheng Chen Cheng Li Chenjun Xiao Chenzhuang Du Chonghua Liao et al. 2025b. Kimi k1. 5: Scaling reinforcement learning with llms. arXiv preprint arXiv:2501.12599 (2025)."},{"key":"e_1_3_2_1_53_1","unstructured":"Qwen Team. 2024a. QwQ: Reflect Deeply on the Boundaries of the Unknown. https:\/\/qwenlm.github.io\/blog\/qwq-32b-preview\/"},{"key":"e_1_3_2_1_54_1","volume-title":"Qwq: Reflect deeply on the boundaries of the unknown. Hugging Face","author":"Team Qwen","year":"2024","unstructured":"Qwen Team. 2024b. Qwq: Reflect deeply on the boundaries of the unknown. Hugging Face (2024)."},{"key":"e_1_3_2_1_55_1","volume-title":"The information bottleneck method. arXiv preprint physics\/0004057","author":"Tishby Naftali","year":"2000","unstructured":"Naftali Tishby, Fernando C Pereira, and William Bialek. 2000. The information bottleneck method. arXiv preprint physics\/0004057 (2000)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00475"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2506.01939"},{"key":"e_1_3_2_1_58_1","volume-title":"Simon Shaolei Du, and Yelong Shen","author":"Wang Yiping","year":"2025","unstructured":"Yiping Wang, Qing Yang, Zhiyuan Zeng, Liliang Ren, Lucas Liu, Baolin Peng, Hao Cheng, Xuehai He, Kuan Wang, Jianfeng Gao, Weizhu Chen, Shuohang Wang, Simon Shaolei Du, and Yelong Shen. 2025a. Reinforcement Learning for Reasoning in Large Language Models with One Training Example. arXiv preprint arXiv:2504.20571 (2025)."},{"key":"e_1_3_2_1_59_1","unstructured":"Jialong Wu Baixuan Li Runnan Fang Wenbiao Yin Liwen Zhang Zhengwei Tao Dingchu Zhang Zekun Xi Yong Jiang Pengjun Xie Fei Huang and Jingren Zhou. 2025a. WebDancer: Towards Autonomous Information Seeking Agency. arXiv:cs.CL\/2505.22648 https:\/\/arxiv.org\/abs\/2505.22648"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2501.07572"},{"key":"e_1_3_2_1_61_1","unstructured":"Jialong Wu Wenbiao Yin Yong Jiang Zhenglin Wang Zekun Xi Runnan Fang Deyu Zhou Pengjun Xie and Fei Huang. 2025d. WebWalker: Benchmarking LLMs in Web Traversal. arXiv:cs.CL\/2501.07572 https:\/\/arxiv.org\/abs\/2501.07572"},{"key":"e_1_3_2_1_62_1","volume-title":"ReSum: Unlocking Long-Horizon Search Intelligence via Context Summarization. arXiv preprint arXiv:2509.13313","author":"Wu Xixi","year":"2025","unstructured":"Xixi Wu, Kuan Li, Yida Zhao, Liwen Zhang, Litu Ou, Huifeng Yin, Zhongwang Zhang, Yong Jiang, Pengjun Xie, Fei Huang, Minhao Cheng, Shuai Wang, Hong Cheng, and Jingren Zhou. 2025b. ReSum: Unlocking Long-Horizon Search Intelligence via Context Summarization. arXiv preprint arXiv:2509.13313 (2025)."},{"key":"e_1_3_2_1_63_1","volume-title":"SimpleTIR: End-to-End Reinforcement Learning for Multi-Turn Tool-Integrated Reasoning. arXiv preprint arXiv:2509.02479","author":"Xue Zhenghai","year":"2025","unstructured":"Zhenghai Xue, Longtao Zheng, Qian Liu, Yingru Li, Xiaosen Zheng, Zejun Ma, and Bo An. 2025. SimpleTIR: End-to-End Reinforcement Learning for Multi-Turn Tool-Integrated Reasoning. arXiv preprint arXiv:2509.02479 (2025)."},{"key":"e_1_3_2_1_64_1","unstructured":"An Yang Anfeng Li Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chang Gao Chengen Huang Chenxu Lv Chujie Zheng Dayiheng Liu Fan Zhou Fei Huang Feng Hu Hao Ge Haoran Wei Huan Lin Jialong Tang Jian Yang Jianhong Tu Jianwei Zhang Jian Yang Jiaxi Yang Jingren Zhou Jingren Zhou Junyang Lin Kai Dang Keqin Bao Kexin Yang Le Yu Lianghao Deng Mei Li Mingfeng Xue Mingze Li Pei Zhang Peng Wang Qin Zhu Rui Men Ruize Gao Shixuan Liu Shuang Luo Tianhao Li Tianyi Tang Wenbiao Yin Xingzhang Ren Xinyu Wang Xinyu Zhang Xuancheng Ren Yang Fan Yang Su Yichang Zhang Yinger Zhang Yu Wan Yuqiong Liu Zekun Wang Zeyu Cui Zhenru Zhang Zhipeng Zhou and Zihan Qiu. 2025. Qwen3 Technical Report. CoRR Vol. abs\/2505.09388 (2025). https:\/\/doi.org\/10.48550\/ARXIV.2505.09388 arXiv:2505.09388"},{"key":"e_1_3_2_1_65_1","volume-title":"React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629","author":"Yao Shunyu","year":"2022","unstructured":"Shunyu Yao, Jeffrey Zhao, Dian Yu, Nan Du, Izhak Shafran, Karthik Narasimhan, and Yuan Cao. 2022. React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629 (2022)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2503.14476"},{"key":"e_1_3_2_1_67_1","unstructured":"Guibin Zhang Hejia Geng Xiaohang Yu Zhenfei Yin Zaibin Zhang Zelin Tan Heng Zhou Zhongzhi Li Xiangyuan Xue Yijiang Li et al. 2025. The Landscape of Agentic Reinforcement Learning for LLMs: A Survey. arXiv preprint arXiv:2509.02547 (2025)."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2309.01219"},{"key":"e_1_3_2_1_69_1","unstructured":"Tianyu Zheng Tianshun Xing Qingshui Gu Taoran Liang Xingwei Qu Xin Zhou Yizhi Li Zhoufutu Wen Chenghua Lin Wenhao Huang et al. 2025a. First Return Entropy-Eliciting Explore. arXiv preprint arXiv:2507.07017 (2025)."},{"key":"e_1_3_2_1_70_1","unstructured":"Tianyu Zheng Tianshun Xing Qingshui Gu Taoran Liang Xingwei Qu Xin Zhou Yizhi Li Zhoufutu Wen Chenghua Lin Wenhao Huang et al. 2025b. First return entropy-eliciting explore. arXiv preprint arXiv:2507.07017 (2025)."},{"key":"e_1_3_2_1_71_1","volume-title":"An information bottleneck perspective for effective noise filtering on retrieval-augmented generation. arXiv preprint arXiv:2406.01549","author":"Zhu Kun","year":"2024","unstructured":"Kun Zhu, Xiaocheng Feng, Xiyuan Du, Yuxuan Gu, Weijiang Yu, Haotian Wang, Qianglong Chen, Zheng Chu, Jingchang Chen, and Bing Qin. 2024. An information bottleneck perspective for effective noise filtering on retrieval-augmented generation. arXiv preprint arXiv:2406.01549 (2024)."}],"event":{"name":"WWW '26: The ACM Web Conference 2026","location":"Dubai United Arab Emirates","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2026"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3774904.3792301","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,7,4]],"date-time":"2026-07-04T08:03:55Z","timestamp":1783152235000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3774904.3792301"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,12]]},"references-count":71,"alternative-id":["10.1145\/3774904.3792301","10.1145\/3774904"],"URL":"https:\/\/doi.org\/10.1145\/3774904.3792301","relation":{},"subject":[],"published":{"date-parts":[[2026,4,12]]},"assertion":[{"value":"2026-04-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}