{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,16]],"date-time":"2026-07-16T05:17:04Z","timestamp":1784179024658,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","funder":[{"name":"National Key Research and Development Program of China","award":["2023YFA1008704"],"award-info":[{"award-number":["2023YFA1008704"]}]},{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62472426, 62376275"],"award-info":[{"award-number":["62472426, 62376275"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3730102","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T01:38:52Z","timestamp":1752457132000},"page":"1251-1261","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["ReARTeR: Retrieval-Augmented Reasoning with Trustworthy Process Rewarding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6109-4704","authenticated-orcid":false,"given":"Zhongxiang","family":"Sun","sequence":"first","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin Unversity of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0061-5769","authenticated-orcid":false,"given":"Qipeng","family":"Wang","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5676-4339","authenticated-orcid":false,"given":"Weijie","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Information Technology and Management, University of International Business and Economics, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5923-3429","authenticated-orcid":false,"given":"Xiaoxue","family":"Zang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology Co., Ltd., Beijing, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3822-2815","authenticated-orcid":false,"given":"Kai","family":"Zheng","sequence":"additional","affiliation":[{"name":"Kuaishou Technology Co., Ltd., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7170-111X","authenticated-orcid":false,"given":"Jun","family":"Xu","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin Unversity of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7397-5632","authenticated-orcid":false,"given":"Xiao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1714-5527","authenticated-orcid":false,"given":"Yang","family":"Song","sequence":"additional","affiliation":[{"name":"Kuaishou Technology Co., Ltd., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9801-9292","authenticated-orcid":false,"given":"Han","family":"Li","sequence":"additional","affiliation":[{"name":"Kuaishou Technology Co., Ltd., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2012.2186810"},{"key":"e_1_3_2_1_3_1","volume-title":"Progressive Multimodal Reasoning via Active Retrieval. arXiv preprint arXiv:2412.14835","author":"Dong Guanting","year":"2024","unstructured":"Guanting Dong, Chenghao Zhang, Mengjie Deng, Yutao Zhu, Zhicheng Dou, and Ji-Rong Wen. 2024. Progressive Multimodal Reasoning via Active Retrieval. arXiv preprint arXiv:2412.14835 (2024)."},{"key":"e_1_3_2_1_4_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_5_1","volume-title":"Kto: Model alignment as prospect theoretic optimization. arXiv preprint arXiv:2402.01306","author":"Ethayarajh Kawin","year":"2024","unstructured":"Kawin Ethayarajh, Winnie Xu, Niklas Muennighoff, Dan Jurafsky, and Douwe Kiela. 2024. Kto: Model alignment as prospect theoretic optimization. arXiv preprint arXiv:2402.01306 (2024)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671470"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00370"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.580"},{"key":"e_1_3_2_1_9_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations.","author":"Hu Edward J","unstructured":"Edward J Hu, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, Weizhu Chen, et al. [n.,d.]. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_10_1","volume-title":"Large Language Models Cannot Self-Correct Reasoning Yet. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=IkmD3fKBPQ","author":"Huang Jie","year":"2024","unstructured":"Jie Huang, Xinyun Chen, Swaroop Mishra, Huaixiu Steven Zheng, Adams Wei Yu, Xinying Song, and Denny Zhou. 2024. Large Language Models Cannot Self-Correct Reasoning Yet. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=IkmD3fKBPQ"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.389"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.389"},{"key":"e_1_3_2_1_13_1","volume-title":"Longllmlingua: Accelerating and enhancing llms in long context scenarios via prompt compression. arXiv preprint arXiv:2310.06839","author":"Jiang Huiqiang","year":"2023","unstructured":"Huiqiang Jiang, Qianhui Wu, Xufang Luo, Dongsheng Li, Chin-Yew Lin, Yuqing Yang, and Lili Qiu. 2023. Longllmlingua: Accelerating and enhancing llms in long context scenarios via prompt compression. arXiv preprint arXiv:2310.06839 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"FlashRAG: A Modular Toolkit for Efficient Retrieval-Augmented Generation Research. arXiv preprint arXiv:2405.13576","author":"Jin Jiajie","year":"2024","unstructured":"Jiajie Jin, Yutao Zhu, Xinyu Yang, Chenghao Zhang, and Zhicheng Dou. 2024. FlashRAG: A Modular Toolkit for Efficient Retrieval-Augmented Generation Research. arXiv preprint arXiv:2405.13576 (2024)."},{"key":"e_1_3_2_1_15_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Kim Jaehyung","unstructured":"Jaehyung Kim, Jaehyun Nam, Sangwoo Mo, Jongjin Park, Sang-Woo Lee, Minjoon Seo, Jung-Woo Ha, and Jinwoo Shin. [n.,d.]. SuRe: Summarizing Retrievals using Answer Candidates for Open-domain QA of LLMs. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_16_1","volume-title":"The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=w4DW6qkRmt","author":"Kim Jaehyung","year":"2024","unstructured":"Jaehyung Kim, Jaehyun Nam, Sangwoo Mo, Jongjin Park, Sang-Woo Lee, Minjoon Seo, Jung-Woo Ha, and Jinwoo Shin. 2024. SuRe: Summarizing Retrievals using Answer Candidates for Open-domain QA of LLMs. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=w4DW6qkRmt"},{"key":"e_1_3_2_1_17_1","volume-title":"LLMs-as-Judges: A Comprehensive Survey on LLM-based Evaluation Methods. arXiv preprint arXiv:2412.05579","author":"Li Haitao","year":"2024","unstructured":"Haitao Li, Qian Dong, Junjie Chen, Huixue Su, Yujia Zhou, Qingyao Ai, Ziyi Ye, and Yiqun Liu. 2024a. LLMs-as-Judges: A Comprehensive Survey on LLM-based Evaluation Methods. arXiv preprint arXiv:2412.05579 (2024)."},{"key":"e_1_3_2_1_18_1","volume-title":"Can We Further Elicit Reasoning in LLMs? Critic-Guided Planning with Retrieval-Augmentation for Solving Challenging Tasks. arXiv preprint arXiv:2410.01428","author":"Li Xingxuan","year":"2024","unstructured":"Xingxuan Li, Weiwen Xu, Ruochen Zhao, Fangkai Jiao, Shafiq Joty, and Lidong Bing. 2024b. Can We Further Elicit Reasoning in LLMs? Critic-Guided Planning with Retrieval-Augmentation for Solving Challenging Tasks. arXiv preprint arXiv:2410.01428 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.391"},{"key":"e_1_3_2_1_20_1","volume-title":"Let's verify step by step. arXiv preprint arXiv:2305.20050","author":"Lightman Hunter","year":"2023","unstructured":"Hunter Lightman, Vineet Kosaraju, Yura Burda, Harri Edwards, Bowen Baker, Teddy Lee, Jan Leike, John Schulman, Ilya Sutskever, and Karl Cobbe. 2023. Let's verify step by step. arXiv preprint arXiv:2305.20050 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"Skywork-Reward: Bag of Tricks for Reward Modeling in LLMs. arXiv preprint arXiv:2410.18451","author":"Liu Chris Yuhao","year":"2024","unstructured":"Chris Yuhao Liu, Liang Zeng, Jiacai Liu, Rui Yan, Jujie He, Chaojie Wang, Shuicheng Yan, Yang Liu, and Yahui Zhou. 2024. Skywork-Reward: Bag of Tricks for Reward Modeling in LLMs. arXiv preprint arXiv:2410.18451 (2024)."},{"key":"e_1_3_2_1_22_1","unstructured":"Liangchen Luo Yinxiao Liu Rosanne Liu Samrat Phatale Harsh Lara Yunxuan Li Lei Shu Yun Zhu Lei Meng Jiao Sun et al. 2024. Improve Mathematical Reasoning in Language Models by Automated Process Supervision. arXiv preprint arXiv:2406.06592 (2024)."},{"key":"e_1_3_2_1_23_1","volume-title":"Reft: Reasoning with reinforced fine-tuning. arXiv preprint arXiv:2401.08967","author":"Luong Trung Quoc","year":"2024","unstructured":"Trung Quoc Luong, Xinbo Zhang, Zhanming Jie, Peng Sun, Xiaoran Jin, and Hang Li. 2024. Reft: Reasoning with reinforced fine-tuning. arXiv preprint arXiv:2401.08967 (2024)."},{"key":"e_1_3_2_1_24_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Madaan Aman","year":"2024","unstructured":"Aman Madaan, Niket Tandon, Prakhar Gupta, Skyler Hallinan, Luyu Gao, Sarah Wiegreffe, Uri Alon, Nouha Dziri, Shrimai Prabhumoye, Yiming Yang, et al. 2024. Self-refine: Iterative refinement with self-feedback. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_25_1","unstructured":"Skywork o1 Team. 2024. Skywork-o1 Open Series. https:\/\/huggingface.co\/Skywork. https:\/\/huggingface.co\/Skywork"},{"key":"e_1_3_2_1_26_1","unstructured":"OpenAI. 2024. Introducing Deep Research. https:\/\/openai.com\/index\/introducing-deep-research\/"},{"key":"e_1_3_2_1_27_1","volume-title":"Iterative reasoning preference optimization. arXiv preprint arXiv:2404.19733","author":"Pang Richard Yuanzhe","year":"2024","unstructured":"Richard Yuanzhe Pang, Weizhe Yuan, Kyunghyun Cho, He He, Sainbayar Sukhbaatar, and Jason Weston. 2024. Iterative reasoning preference optimization. arXiv preprint arXiv:2404.19733 (2024)."},{"key":"e_1_3_2_1_28_1","volume-title":"Measuring and narrowing the compositionality gap in language models. arXiv preprint arXiv:2210.03350","author":"Press Ofir","year":"2022","unstructured":"Ofir Press, Muru Zhang, Sewon Min, Ludwig Schmidt, Noah A Smith, and Mike Lewis. 2022. Measuring and narrowing the compositionality gap in language models. arXiv preprint arXiv:2210.03350 (2022)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.378"},{"key":"e_1_3_2_1_30_1","volume-title":"Fan Yang, and Mao Yang.","author":"Qi Zhenting","year":"2024","unstructured":"Zhenting Qi, Mingyuan Ma, Jiahang Xu, Li Lyna Zhang, Fan Yang, and Mao Yang. 2024. Mutual reasoning makes smaller llms stronger problem-solvers. arXiv preprint arXiv:2408.06195 (2024)."},{"key":"e_1_3_2_1_31_1","volume-title":"Enhancing retrieval-augmented large language models with iterative retrieval-generation synergy. arXiv preprint arXiv:2305.15294","author":"Shao Zhihong","year":"2023","unstructured":"Zhihong Shao, Yeyun Gong, Yelong Shen, Minlie Huang, Nan Duan, and Weizhu Chen. 2023. Enhancing retrieval-augmented large language models with iterative retrieval-generation synergy. arXiv preprint arXiv:2305.15294 (2023)."},{"key":"e_1_3_2_1_32_1","volume-title":"A survey of controllable learning: Methods and applications in information retrieval. arXiv preprint arXiv:2407.06083","author":"Shen Chenglei","year":"2024","unstructured":"Chenglei Shen, Xiao Zhang, Teng Shi, Changshuo Zhang, Guofu Xie, and Jun Xu. 2024. A survey of controllable learning: Methods and applications in information retrieval. arXiv preprint arXiv:2407.06083 (2024)."},{"key":"e_1_3_2_1_33_1","volume-title":"Replug: Retrieval-augmented black-box language models. arXiv preprint arXiv:2301.12652","author":"Shi Weijia","year":"2023","unstructured":"Weijia Shi, Sewon Min, Michihiro Yasunaga, Minjoon Seo, Rich James, Mike Lewis, Luke Zettlemoyer, and Wen-tau Yih. 2023. Replug: Retrieval-augmented black-box language models. arXiv preprint arXiv:2301.12652 (2023)."},{"key":"e_1_3_2_1_34_1","unstructured":"Avi Singh John D Co-Reyes Rishabh Agarwal Ankesh Anand Piyush Patil Xavier Garcia Peter J Liu James Harrison Jaehoon Lee Kelvin Xu et al. 2023. Beyond human data: Scaling self-training for problem-solving with language models. arXiv preprint arXiv:2312.06585 (2023)."},{"key":"e_1_3_2_1_35_1","volume-title":"Scaling llm test-time compute optimally can be more effective than scaling model parameters. arXiv preprint arXiv:2408.03314","author":"Snell Charlie","year":"2024","unstructured":"Charlie Snell, Jaehoon Lee, Kelvin Xu, and Aviral Kumar. 2024. Scaling llm test-time compute optimally can be more effective than scaling model parameters. arXiv preprint arXiv:2408.03314 (2024)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3696410.3714800"},{"key":"e_1_3_2_1_37_1","volume-title":"The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=ztzZDzgfrh","author":"Sun ZhongXiang","year":"2025","unstructured":"ZhongXiang Sun, Xiaoxue Zang, Kai Zheng, Jun Xu, Xiao Zhang, Weijie Yu, Yang Song, and Han Li. 2025 b. ReDeEP: Detecting Hallucination in Retrieval-Augmented Generation via Mechanistic Interpretability. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=ztzZDzgfrh"},{"key":"e_1_3_2_1_38_1","unstructured":"Richard Sutton. 2019. The Bitter Lesson. http:\/\/incompleteideas.net\/IncIdeas\/BitterLesson.html Incomplete Ideas (blog) 13(1):38."},{"key":"e_1_3_2_1_39_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S","unstructured":"Richard S Sutton and Andrew G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/203330.203343"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00475"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.557"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.410"},{"key":"e_1_3_2_1_44_1","volume-title":"Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533","author":"Wang Liang","year":"2022","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, and Furu Wei. 2022. Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533 (2022)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.510"},{"key":"e_1_3_2_1_46_1","volume-title":"Self-knowledge guided retrieval augmentation for large language models. arXiv preprint arXiv:2310.05002","author":"Wang Yile","year":"2023","unstructured":"Yile Wang, Peng Li, Maosong Sun, and Yang Liu. 2023. Self-knowledge guided retrieval augmentation for large language models. arXiv preprint arXiv:2310.05002 (2023)."},{"key":"e_1_3_2_1_47_1","volume-title":"Multi-step problem solving through a verifier: An empirical analysis on model-induced process supervision. arXiv preprint arXiv:2402.02658","author":"Wang Zihan","year":"2024","unstructured":"Zihan Wang, Yunxuan Li, Yuexin Wu, Liangchen Luo, Le Hou, Hongkun Yu, and Jingbo Shang. 2024b. Multi-step problem solving through a verifier: An empirical analysis on model-induced process supervision. arXiv preprint arXiv:2402.02658 (2024)."},{"key":"e_1_3_2_1_48_1","volume-title":"Enhancing Mathematical Reasoning in LLMs by Stepwise Correction. arXiv preprint arXiv:2410.12934","author":"Wu Zhenyu","year":"2024","unstructured":"Zhenyu Wu, Qingkai Zeng, Zhihan Zhang, Zhaoxuan Tan, Chao Shen, and Meng Jiang. 2024. Enhancing Mathematical Reasoning in LLMs by Stepwise Correction. arXiv preprint arXiv:2410.12934 (2024)."},{"key":"e_1_3_2_1_49_1","volume-title":"Monte Carlo Tree Search Boosts Reasoning via Iterative Preference Learning. arXiv preprint arXiv:2405.00451","author":"Xie Yuxi","year":"2024","unstructured":"Yuxi Xie, Anirudh Goyal, Wenyue Zheng, Min-Yen Kan, Timothy P Lillicrap, Kenji Kawaguchi, and Michael Shieh. 2024. Monte Carlo Tree Search Boosts Reasoning via Iterative Preference Learning. arXiv preprint arXiv:2405.00451 (2024)."},{"key":"e_1_3_2_1_50_1","unstructured":"Fangyuan Xu Weijia Shi and Eunsol Choi. 2024. RECOMP: Improving Retrieval-Augmented LMs with Context Compression and Selective Augmentation. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=mlJLVigNHp"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1259"},{"key":"e_1_3_2_1_52_1","volume-title":"ReAct: Synergizing Reasoning and Acting in Language Models. In International Conference on Learning Representations (ICLR).","author":"Yao Shunyu","year":"2023","unstructured":"Shunyu Yao, Jeffrey Zhao, Dian Yu, Nan Du, Izhak Shafran, Karthik Narasimhan, and Yuan Cao. 2023. ReAct: Synergizing Reasoning and Acting in Language Models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_53_1","volume-title":"Inference scaling for long-context retrieval augmented generation. arXiv preprint arXiv:2410.04343","author":"Yue Zhenrui","year":"2024","unstructured":"Zhenrui Yue, Honglei Zhuang, Aijun Bai, Kai Hui, Rolf Jagerman, Hansi Zeng, Zhen Qin, Dong Wang, Xuanhui Wang, and Michael Bendersky. 2024. Inference scaling for long-context retrieval augmented generation. arXiv preprint arXiv:2410.04343 (2024)."},{"key":"e_1_3_2_1_54_1","volume-title":"Llm self-training via process reward guided tree search. arXiv preprint arXiv:2406.03816","author":"Zhang Dan","year":"2024","unstructured":"Dan Zhang, Sining Zhoubian, Ziniu Hu, Yisong Yue, Yuxiao Dong, and Jie Tang. 2024b. Rest-mcts*: Llm self-training via process reward guided tree search. arXiv preprint arXiv:2406.03816 (2024)."},{"key":"e_1_3_2_1_55_1","volume-title":"OpenRFT: Adapting Reasoning Foundation Model for Domain-specific Tasks with Reinforcement Fine-Tuning. arXiv preprint arXiv:2412.16849","author":"Zhang Yuxiang","year":"2024","unstructured":"Yuxiang Zhang, Yuqi Yang, Jiangming Shu, Yuhang Wang, Jinlin Xiao, and Jitao Sang. 2024a. OpenRFT: Adapting Reasoning Foundation Model for Domain-specific Tasks with Reinforcement Fine-Tuning. arXiv preprint arXiv:2412.16849 (2024)."},{"key":"e_1_3_2_1_56_1","volume-title":"Marco-o1: Towards open reasoning models for open-ended solutions. arXiv preprint arXiv:2411.14405","author":"Zhao Yu","year":"2024","unstructured":"Yu Zhao, Huifeng Yin, Bo Zeng, Hao Wang, Tianqi Shi, Chenyang Lyu, Longyue Wang, Weihua Luo, and Kaifu Zhang. 2024. Marco-o1: Towards open reasoning models for open-ended solutions. arXiv preprint arXiv:2411.14405 (2024)."}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Padua Italy","acronym":"SIGIR '25","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3730102","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:07:13Z","timestamp":1755864433000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3730102"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":56,"alternative-id":["10.1145\/3726302.3730102","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3730102","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}