{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T03:04:04Z","timestamp":1779159844310,"version":"3.51.4"},"reference-count":247,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62436009"],"award-info":[{"award-number":["62436009"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1109\/tpami.2025.3637037","type":"journal-article","created":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T18:28:44Z","timestamp":1764095324000},"page":"3335-3354","source":"Crossref","is-referenced-by-count":12,"title":["From System 1 to System 2: A Survey of Reasoning Large Language Models"],"prefix":"10.1109","volume":"48","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4280-431X","authenticated-orcid":false,"given":"Duzhen","family":"Zhang","sequence":"first","affiliation":[{"name":"Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, UAE"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhong-Zhi","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming-Liang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiaxin","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Strathclyde, Glasgow, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3825-5474","authenticated-orcid":false,"given":"Zengyan","family":"Liu","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Kowloon Tong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3955-7272","authenticated-orcid":false,"given":"Yuxuan","family":"Yao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Kowloon Tong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haotian","family":"Xu","sequence":"additional","affiliation":[{"name":"Xiaohongshu Inc, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9124-2467","authenticated-orcid":false,"given":"Junhao","family":"Zheng","sequence":"additional","affiliation":[{"name":"South China University of Technology, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiuyi","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yingying","family":"Zhang","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6412-9140","authenticated-orcid":false,"given":"Fei","family":"Yin","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8545-4447","authenticated-orcid":false,"given":"Jiahua","family":"Dong","sequence":"additional","affiliation":[{"name":"Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, UAE"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6232-5957","authenticated-orcid":false,"given":"Zhijiang","family":"Guo","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Kowloon Tong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0345-9899","authenticated-orcid":false,"given":"Le","family":"Song","sequence":"additional","affiliation":[{"name":"Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, UAE"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6743-4175","authenticated-orcid":false,"given":"Cheng-Lin","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-emnlp.42"},{"key":"ref2","first-page":"15476","article-title":"STaR: Self-taught reasoner bootstrapping reasoning with reasoning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zelikman"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1111\/j.2044-8295.1984.tb01915.x"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1257\/000282803322655392"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.67"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.294"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.507"},{"key":"ref8","article-title":"Meta prompting for AGI systems","author":"Zhang","year":"2023"},{"key":"ref9","article-title":"Hello GPT-4o","year":"2024"},{"key":"ref10","article-title":"DeepSeek-V3 technical report","author":"Liu","year":"2024"},{"key":"ref11","article-title":"Learning to reason with LLMs","year":"2024"},{"key":"ref12","article-title":"OpenAI o3-mini","year":"2025"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-025-09422-z"},{"key":"ref14","article-title":"Training verifiers to solve math word problems","author":"Cobbe","year":"2021"},{"key":"ref15","first-page":"22199","article-title":"Large language models are zero-shot reasoners","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Kojima"},{"key":"ref16","article-title":"Thinking like an expert: Multimodal hypergraph-of-thought (HoT) reasoning to boost foundation modals","author":"Yao","year":"2023"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.108"},{"key":"ref18","article-title":"A comparative study on reasoning patterns of OpenAI\u2019s o1 Model","author":"Wu","year":"2024"},{"key":"ref19","article-title":"Towards system 2 reasoning in LLMs: Learning how to think with meta chain-of-though","author":"Xiang","year":"2025"},{"key":"ref20","article-title":"O1 replication journey: A strategic progress report\u2013part 1","author":"Qin","year":"2024"},{"key":"ref21","article-title":"O1 replication journey\u2013Part 2: Surpassing O1-preview through simple distillation, big progress or bitter lesson?","author":"Huang","year":"2024"},{"key":"ref22","article-title":"O1 replication journey\u2013Part 3: Inference-time scaling for medical reasoning","author":"Huang","year":"2025"},{"key":"ref23","article-title":"Imitate, explore, and self-improve: A reproduction report on slow-thinking reasoning systems","author":"Min","year":"2024"},{"key":"ref24","article-title":"RedStar: Does scaling long-CoT data unlock better slow-reasoning systems?","author":"Xu","year":"2025"},{"key":"ref25","article-title":"Scaling of search and learning: A roadmap to reproduce o1 from reinforcement learning perspective","author":"Zeng","year":"2024"},{"key":"ref26","article-title":"Test-time computing: From system-1 thinking to system-2 thinking","author":"Ji","year":"2025"},{"key":"ref27","article-title":"Reasoning language models: A blueprint","author":"Besta","year":"2025"},{"key":"ref28","article-title":"LLM as a mastermind: A survey of strategic reasoning with large language models","author":"Zhang","year":"2024"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2025.101370"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref31","article-title":"Improving language understanding by generative pre-training","author":"Radford","year":"2018"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-016-0059-y"},{"key":"ref33","article-title":"A survey on Big Data privacy using hadoop architecture","volume":"17","author":"Jain","year":"2017","journal-title":"Int. J. Comput. Sci. Netw. Secur."},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-018-0124-9"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.31577\/cai_2020_3_537"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.7286"},{"key":"ref37","article-title":"Language models are unsupervised multitask learners","author":"Radford","year":"2019","journal-title":"OpenAI"},{"key":"ref38","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Brown"},{"key":"ref39","first-page":"1","article-title":"PaLM: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"J. Mach. Learn. Res."},{"key":"ref40","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref41","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford"},{"key":"ref42","first-page":"8821","article-title":"Zero-shot text-to-image generation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ramesh"},{"key":"ref43","article-title":"GPT-4 technical report","year":"2023"},{"key":"ref44","article-title":"LLaMA 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.70"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.64"},{"key":"ref47","article-title":"A prompt pattern catalog to enhance prompt engineering with ChatGPT","author":"White","year":"2023"},{"key":"ref48","article-title":"Retrieval-augmented generation for large language models: A survey","author":"Gao","year":"2023"},{"key":"ref49","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Wei"},{"key":"ref50","volume-title":"Symbolic Logic","author":"Lewis","year":"1959"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/79204.79210"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/0167-9236(88)90128-5"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/800025.1198360"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/B978-044450813-3\/50004-7"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1016\/b978-1-4832-1446-7.50018-2"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2012.2186810"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2011.03.007"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10228-y"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref66","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ng"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.59556\/japi.72.0518"},{"key":"ref68","article-title":"Technical report: Enhancing LLM reasoning with reward-guided tree search","author":"Jiang","year":"2024"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-emnlp.1140"},{"key":"ref70","article-title":"Forest-of-thought: scaling test-time compute for enhancing LLM reasoning","volume-title":"Proc. Int. Conf. Mach. Learn","author":"Bi"},{"key":"ref71","article-title":"Self-consistency improves chain of thought reasoning in language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref72","article-title":"Mutual reasoning makes smaller LLMs stronger problem-solvers","author":"Qi","year":"2024"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.1247"},{"key":"ref74","article-title":"Beyond examples: High-level automated reasoning paradigm in in-context learning via MCTS","author":"Wu","year":"2024"},{"key":"ref75","article-title":"Mulberry: Empowering MLLM with o1-like reasoning and reflection via collective Monte Carlo tree search","author":"Yao","year":"2024"},{"key":"ref76","article-title":"g1: Using Llama-3.1 70b on Groq to create o1-like reasoning chains","author":"Klieger","year":"2024"},{"key":"ref77","article-title":"Thinking claude","author":"Lyu","year":"2024"},{"key":"ref78","article-title":"QwQ: Reflect deeply on the boundaries of the unknown","author":"Team","year":"2024","journal-title":"Hugging Face"},{"key":"ref79","article-title":"Sky-T1: Train your own O1 preview model within $450","author":"Team","year":"2025"},{"key":"ref80","article-title":"Virgo: A preliminary exploration on reproducing o1-like MLLM","author":"Du","year":"2025"},{"key":"ref81","first-page":"95095","article-title":"Measuring multimodal mathematical reasoning with math-vision dataset","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","volume":"37","author":"Wang","year":"2024"},{"key":"ref82","article-title":"Slow perception: Let\u2019s perceive geometric figures step-by-step","author":"Wei","year":"2024"},{"key":"ref83","article-title":"Gemini 2.5: Our most intelligent AI model","author":"DeepMind","year":"2025"},{"key":"ref84","article-title":"Claude 3.7 Sonnet and Claude code","year":"2025"},{"key":"ref85","article-title":"AlphaCode 2 technical report","author":"AlphaCode Team","year":"2023"},{"key":"ref86","article-title":"Kimi k1. 5: Scaling reinforcement learning with LLMs","author":"Team","year":"2025"},{"key":"ref87","article-title":"7B model and 8K examples: Emerging reasoning with reinforcement learning is both effective and efficient","author":"Zeng","year":"2025"},{"key":"ref88","article-title":"R1-V: Reinforcing super generalization ability in vision-language models with less than $3","author":"Chen","year":"2025"},{"key":"ref89","article-title":"Monte Carlo tree search boosts reasoning via iterative preference learning","author":"Xie","year":"2024"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2025\/965"},{"key":"ref91","article-title":"MC-NEST - Enhancing mathematical reasoning in large language models with a Monte Carlo Nash equilibrium self-refine tree","author":"Rabby","year":"2024"},{"key":"ref92","article-title":"SPaR: Self-play with tree-search refinement to improve instruction-following in large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Cheng","year":"2025"},{"key":"ref93","article-title":"Marco-o1: Towards open reasoning models for open-ended solutions","author":"Zhao","year":"2024"},{"key":"ref94","article-title":"HuatuoGPT-o1, Towards medical complex reasoning with LLMs","author":"Chen","year":"2024"},{"key":"ref95","first-page":"11809","article-title":"Tree of thoughts: Deliberate problem solving with large language models","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yao"},{"key":"ref96","article-title":"Tree search for language model agents","author":"Koh","year":"2025","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.emnlp-main.410"},{"key":"ref98","first-page":"49890","article-title":"Alphazero-like tree-search can guide large language model decoding and training","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wan"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.emnlp-main.276"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-long.189"},{"key":"ref101","first-page":"60429","article-title":"Generating code world models with large language models guided by Monte Carlo tree search","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Dainese"},{"key":"ref102","first-page":"31967","article-title":"Large language models as commonsense knowledge for large-scale task planning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zhao"},{"key":"ref103","article-title":"Accessing GPT-4 level mathematical olympiad solutions via Monte Carlo tree self-refine with LLaMa-3 8B","author":"Zhang","year":"2024"},{"key":"ref104","article-title":"MindStar: Enhancing math reasoning in pre-trained LLMs at inference time","author":"Kang","year":"2024"},{"key":"ref105","article-title":"GPT-guided Monte Carlo tree search for symbolic regression in financial fraud detection","author":"Kadam","year":"2024"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-long.375"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2066"},{"key":"ref108","article-title":"Don\u2019t throw away your value model! Generating more preferable text with value-guided Monte-Carlo tree search decoding","volume-title":"Proc. 1st Conf. Lang. Model.","author":"Liu"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-025-01164-x"},{"key":"ref110","article-title":"Towards intrinsic self-correction enhancement in Monte Carlo tree search boosted reasoning via iterative preference learning","author":"Jiang","year":"2024"},{"key":"ref111","article-title":"No train still gain. Unleash mathematical reasoning of large language models with Monte Carlo tree search guided by energy function","author":"Xu","year":"2023"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-023-05240-w"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.291"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.510"},{"key":"ref115","first-page":"79935","article-title":"AutoPSV: Automated process-supervised verifier","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Lu"},{"key":"ref116","article-title":"Free process rewards without process labels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yuan"},{"key":"ref117","article-title":"Outcome-refining process supervision for code generation","author":"Yu","year":"2024"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2066"},{"key":"ref119","article-title":"Improve mathematical reasoning in language models by automated process supervision","author":"Luo","year":"2024"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3730102"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.547"},{"key":"ref122","article-title":"Step-DPO: Step-wise preference optimization for long-chain reasoning of LLMs","author":"Lai","year":"2024"},{"key":"ref123","article-title":"AdaptiveStep: Automatically dividing reasoning step through model confidence","volume-title":"Proc. Int. Conf. Mach. Learn","author":"Liu"},{"key":"ref124","article-title":"Solving math word problems with process-and outcome-based feedback","author":"Uesato","year":"2022"},{"key":"ref125","article-title":"Fine-grained human feedback gives better rewards for language model training","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Wu"},{"key":"ref126","first-page":"53728","article-title":"Direct preference optimization: Your language model is secretly a reward model","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Rafailov"},{"key":"ref127","first-page":"15476","article-title":"STaR: Bootstrapping reasoning with reasoning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zelikman"},{"key":"ref128","article-title":"Quiet-star: Language models can teach themselves to think before speaking","author":"Zelikman","year":"2024"},{"key":"ref129","article-title":"V-STaR: Training verifiers for self-taught reasoners","volume-title":"Proc. Conf. Lang. Model.","author":"Hosseini"},{"key":"ref130","article-title":"B-STaR: Monitoring and balancing exploration and exploitation in self-taught reasoners","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zeng"},{"key":"ref131","article-title":"rStar-math: Small LLMs can master math reasoning with self-evolved deep thinking","volume-title":"Proc. Int. Conf. Mach. Learn","author":"Guan"},{"key":"ref132","article-title":"Reinforced self-training (ReST) for language modeling","author":"G\u00fcl\u00e7ehre","year":"2023"},{"key":"ref133","article-title":"Beyond human data: Scaling self-training for problem-solving with language models","author":"Singh","year":"2024","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.635"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1754"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.52202\/079017-4175"},{"key":"ref137","article-title":"Language model self-improvement by reinforcement learning contemplation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Pang"},{"key":"ref138","first-page":"30039","article-title":"AlpacaFarm: A simulation framework for methods that learn from human feedback","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Dubois"},{"key":"ref139","first-page":"46534","article-title":"SELF-REFINE: Iterative refinement with self-feedback","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Madaan"},{"key":"ref140","article-title":"SelfCheck: Using LLMs to zero-shot check their own step-by-step reasoning","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Miao"},{"key":"ref141","article-title":"CRITIC: Large language models can self-correct with tool-interactive critiquing","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Gou"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.814"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.167"},{"key":"ref144","first-page":"41618","article-title":"Self-evaluation guided beam search for reasoning","author":"Xie","year":"2023"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.5"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-short.42"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.66"},{"key":"ref148","article-title":"Learning from correctness without prompting makes LLM efficient reasoner","author":"Yao","year":"2024"},{"key":"ref149","article-title":"Thinking fast and slow with deep learning and tree search","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Anthony"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.169"},{"key":"ref151","article-title":"Optimizing language model\u2019s reasoning abilities with weak supervision","author":"Tong","year":"2024"},{"key":"ref152","article-title":"V-STaR: Training verifiers for self-taught reasoners","volume-title":"Proc. Conf. Lang. Model.","author":"Hosseini"},{"key":"ref153","first-page":"10764","article-title":"PAL: Program-aided language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gao"},{"key":"ref154","article-title":"SelfCheck: Using LLMs to zero-shot check their own step-by-step reasoning","author":"Miao","year":"2023"},{"key":"ref155","article-title":"Learning from mistakes makes LLM better reasoner","author":"An","year":"2023"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.500"},{"key":"ref157","article-title":"SuperCorrect: Supervising and correcting language models with error-driven insights","author":"Yang","year":"2024"},{"key":"ref158","article-title":"ReasonFlux: Hierarchical LLM reasoning via scaling thought templates","author":"Yang","year":"2025"},{"key":"ref159","first-page":"113519","article-title":"Buffer of thoughts: Thought-augmented reasoning with large language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"37","author":"Yang"},{"key":"ref160","first-page":"2087","article-title":"LLaVA-o1: Let vision language models reason step-by-step","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis","author":"Xu"},{"key":"ref161","article-title":"AtomThink: A slow thinking framework for multimodal mathematical reasoning","author":"Xiang","year":"2024"},{"key":"ref162","article-title":"Automatic chain of thought prompting in large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhang"},{"key":"ref163","article-title":"Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks","author":"Chen","year":"2023","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref164","article-title":"Decomposed prompting: A modular approach for solving complex tasks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Khot"},{"key":"ref165","article-title":"Least-to-most prompting enables complex reasoning in large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhou"},{"key":"ref166","article-title":"CoAct: A global-local hierarchy for autonomous agent collaboration","author":"Hou","year":"2024"},{"key":"ref167","article-title":"Satori: Reinforcement learning with chain-of-action-thought enhances LLM reasoning via autoregressive search","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Shen"},{"key":"ref168","article-title":"Reinforcement fine-tuning","year":"2024"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1145\/3641289"},{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.410"},{"key":"ref171","article-title":"Blob reinforcement fin-tuning","year":"2024"},{"key":"ref172","article-title":"Stream of search (SOS): Learning to search in language","author":"Gandhi","year":"2024"},{"key":"ref173","article-title":"Reasoning with reinforced functional token tuning","author":"Zhang","year":"2025"},{"key":"ref174","article-title":"QLASS: Boosting language agent inference via q-guided stepwise search","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lin"},{"key":"ref175","article-title":"Process reinforcement through implicit rewards","author":"Cui","year":"2025"},{"key":"ref176","article-title":"DeepScaleR: Surpassing o1-preview with a 1.5B model by scaling RL","author":"Luo","year":"2025"},{"key":"ref177","article-title":"Stop gamma decay: Min-Form credit assignment is all process reward model needs for reasoning","author":"Cheng","year":"2025"},{"key":"ref178","article-title":"Open R1: A fully open reproduction of DeepSeek-R1","author":"Team","year":"2025"},{"key":"ref179","article-title":"TinyZero","author":"Pan","year":"2025"},{"key":"ref180","article-title":"There may not be aha moment in R1-zero-like training \u2014 A pilot study","author":"Liu","year":"2025"},{"key":"ref181","article-title":"OAT: A research-friendly framework for LLM online alignment","author":"Liu","year":"2025"},{"key":"ref182","article-title":"LIMR: Less is more for RL scaling","author":"Li","year":"2025"},{"key":"ref183","article-title":"Teaching language models to critique via reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xie"},{"key":"ref184","article-title":"Logic-RL: Unleashing LLM reasoning with rule-based reinforcement learning","author":"Xie","year":"2025"},{"key":"ref185","article-title":"Online-DPO-R1: Unlocking effective reasoning without the PPO overhead","author":"Zhang","year":"2025"},{"key":"ref186","article-title":"Open-reasoner-zero: An open source approach to scaling reinforcement learning on the base model","author":"Hu","year":"2025"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01310"},{"key":"ref188","first-page":"26874","article-title":"RLAIF vs. RLHF: Scaling reinforcement learning from human feedback with AI feedback","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee"},{"key":"ref189","article-title":"MM-RLHF: The next step forward in multimodal LLM alignment","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang"},{"key":"ref190","article-title":"Align anything: Training all-modality models to follow instructions with language feedback","author":"Ji","year":"2024"},{"key":"ref191","article-title":"VLM-R1: A stable and generalizable R1-style large vision-language model","author":"Shen","year":"2025"},{"key":"ref192","article-title":"LMM-R1: Empowering 3B LMMs with strong reasoning abilities through two-stage rule-based RL","author":"Peng","year":"2025"},{"key":"ref193","article-title":"Open-R1-video","author":"Wang","year":"2025"},{"key":"ref194","article-title":"EasyR1: An efficient, scalable, multi-modality RL training framework","author":"Zheng","year":"2025"},{"key":"ref195","article-title":"Demystifying long chain-of-thought reasoning in LLMs","author":"Yeo","year":"2025"},{"key":"ref196","article-title":"Does RLHF scale? Exploring the impacts from data, model, and method","author":"Hou","year":"2024"},{"key":"ref197","article-title":"Metastable dynamics of chain-of-thought reasoning: provable benefits of search, RL and distillation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim"},{"key":"ref198","article-title":"There may not be aha moment in R1-zero-like training \u2014 A pilot study","author":"Liu","year":"2025"},{"key":"ref199","article-title":"Qwen2. 5 technical report","author":"Yang","year":"2024"},{"key":"ref200","article-title":"Do NOT think that much for 2 + 3 = ? on the overthinking of o1-like LLMs","author":"Chen","year":"2024"},{"key":"ref201","article-title":"O1-Pruner: Length-harmonizing fine-tuning for o1-like reasoning pruning","author":"Luo","year":"2025"},{"key":"ref202","article-title":"REINFORCE : A simple and efficient approach for aligning large language models","author":"Hu","year":"2025"},{"key":"ref203","article-title":"AIME 2024","year":"2024"},{"key":"ref204","article-title":"Let\u2019s verify step by step","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lightman"},{"key":"ref205","article-title":"AMC 2023","year":"2024"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.211"},{"key":"ref207","article-title":"SWE-Bench: Can language models resolve real-world github issues?","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Jimenez"},{"key":"ref208","article-title":"LivecodeBench: Holistic and contamination free evaluation of large language models for code","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Jain"},{"key":"ref209","article-title":"GPQA: A graduate-level google-proof Q&A benchmark","volume-title":"Proc. 1st Conf. Lang. Model.","author":"Rein"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3018"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.1355"},{"key":"ref212","first-page":"20744","article-title":"Webshop: Towards scalable real-world web interaction with grounded language agents","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yao"},{"key":"ref213","article-title":"WebArena: A realistic web environment for building autonomous agents","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhou"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.775"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-naacl.264"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-long.182"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.3390\/app11146421"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00913"},{"key":"ref219","article-title":"MathVista: Evaluating mathematical reasoning of foundation models in visual contexts","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lu"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73242-3_10"},{"key":"ref221","first-page":"2690","article-title":"CMMaTH: A Chinese multi-modal math skill evaluation benchmark for foundation models","volume-title":"Proc. 31st Int. Conf. Comput. Linguistics","author":"Li"},{"key":"ref222","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/376"},{"key":"ref223","first-page":"2507","article-title":"Learn to explain: Multimodal reasoning via thought chains for science question answering","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Lu"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.30"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-naacl.51"},{"key":"ref226","article-title":"OpenCompass: A universal evaluation platform for foundation models","author":"Contributors","year":"2023"},{"key":"ref227","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.1230"},{"key":"ref228","article-title":"Claude 3.5 sonnet","year":"2024"},{"key":"ref229","first-page":"46534","article-title":"Self-refine: Iterative refinement with self-feedback","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Madaan"},{"key":"ref230","article-title":"Large language models still face challenges in multi-hop reasoning with external knowledge","author":"Zhang","year":"2024"},{"key":"ref231","article-title":"HyperTree planning: Enhancing LLM reasoning via hierarchical thinking","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gui"},{"key":"ref232","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.127"},{"key":"ref233","article-title":"X-Reasoner: Towards generalizable reasoning across modalities and domains","author":"Liu","year":"2025"},{"key":"ref234","article-title":"SELT: Self-evaluation tree search for LLMs with task decomposition","author":"Wu","year":"2025"},{"key":"ref235","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06747-5"},{"key":"ref236","article-title":"Gold-medalist performance in solving olympiad geometry with alphageometry2","author":"Chervonyi","year":"2025"},{"key":"ref237","article-title":"Gemini: A family of highly capable multimodal models","author":"Team","year":"2023"},{"key":"ref238","article-title":"Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context","author":"Team","year":"2024"},{"key":"ref239","article-title":"Gemini 2.0 pro","author":"DeepMind","year":"2024"},{"key":"ref240","first-page":"1","article-title":"Dynamic planning with a LLM","volume-title":"Proc. Adv. Neural Inf. Process. Syst Workshop","author":"Dagan"},{"key":"ref241","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72764-1_5"},{"key":"ref242","article-title":"Can memory-augmented language models generalize on reasoning-in-a-haystack tasks?","author":"Das","year":"2025"},{"key":"ref243","article-title":"LongLoRA: Efficient fine-tuning of long-context large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen"},{"key":"ref244","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2025.3650546"},{"key":"ref245","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hu"},{"key":"ref246","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.287"},{"key":"ref247","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/11372200\/11267249.pdf?arnumber=11267249","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T21:05:50Z","timestamp":1770671150000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11267249\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":247,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3637037","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3]]}}}