{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T19:01:03Z","timestamp":1771268463219,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,2,22]]},"DOI":"10.1145\/3773966.3777986","type":"proceedings-article","created":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T17:50:01Z","timestamp":1771264201000},"page":"436-446","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CoDA: A Context-Decoupled Hierarchical Agent with Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-6818-8820","authenticated-orcid":false,"given":"Xuanzhang","family":"Liu","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9513-5260","authenticated-orcid":false,"given":"Jianglun","family":"Feng","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7028-6067","authenticated-orcid":false,"given":"Zhuoran","family":"Zhuang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7248-5018","authenticated-orcid":false,"given":"Junzhe","family":"Zhao","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1410-3825","authenticated-orcid":false,"given":"Maofei","family":"Que","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3354-2975","authenticated-orcid":false,"given":"Jieting","family":"Li","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7862-4114","authenticated-orcid":false,"given":"Dianlei","family":"Wang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9023-2732","authenticated-orcid":false,"given":"Hao","family":"Tong","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6024-6562","authenticated-orcid":false,"given":"Ye","family":"Chen","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4957-3064","authenticated-orcid":false,"given":"Pan","family":"Li","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,2,21]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Sebastian Borgeaud Arthur Mensch Jordan Hoffmann Trevor Cai Eliza Rutherford Katie Millican George van den Driessche Jean-Baptiste Lespiau Bogdan Damoc Aidan Clark Diego de Las Casas Aurelia Guy Jacob Menick Roman Ring Tom Hennigan Saffron Huang Loren Maggiore Chris Jones Albin Cassirer Andy Brock Michela Paganini Geoffrey Irving Oriol Vinyals Simon Osindero Karen Simonyan Jack W. Rae Erich Elsen and Laurent Sifre. 2022. Improving language models by retrieving from trillions of tokens. arXiv:2112.04426 [cs.CL] https:\/\/arxiv.org\/abs\/2112.04426"},{"key":"e_1_3_2_1_2_1","unstructured":"Mingyang Chen Tianpeng Li Haoze Sun Yijie Zhou Chenzheng Zhu Haofen Wang Jeff Z. Pan Wen Zhang Huajun Chen Fan Yang Zenan Zhou and Weipeng Chen. 2025. ReSearch: Learning to Reason with Search for LLMs via Reinforcement Learning. arXiv:2503.19470 [cs.AI] https:\/\/arxiv.org\/abs\/2503.19470"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the 6th International Conference on Neural Information Processing Systems","author":"Dayan Peter","unstructured":"Peter Dayan and Geoffrey E. Hinton. 1992. Feudal reinforcement learning. In Proceedings of the 6th International Conference on Neural Information Processing Systems (Denver, Colorado) (NIPS'92). Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 271\u2013278."},{"key":"e_1_3_2_1_4_1","unstructured":"DeepSeek-AI. 2025. DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning. arXiv:2501.12948 [cs.CL] https:\/\/arxiv.org\/abs\/2501.12948"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.639"},{"key":"e_1_3_2_1_6_1","unstructured":"Lutfi Eren Erdogan Nicholas Lee Sehoon Kim Suhong Moon Hiroki Furuta Gopala Anumanchipalli Kurt Keutzer and Amir Gholami. 2025. Plan-and-Act: Improving Planning of Agents for Long-Horizon Tasks. arXiv:2503.09572 [cs.CL] https:\/\/arxiv.org\/abs\/2503.09572"},{"key":"e_1_3_2_1_7_1","unstructured":"Yunfan Gao Yun Xiong Xinyu Gao Kangxiang Jia Jinliu Pan Yuxi Bi Yi Dai Jiawei Sun Meng Wang and Haofen Wang. 2024. Retrieval-Augmented Generation for Large Language Models: A Survey. arXiv:2312.10997 [cs.CL] https:\/\/arxiv.org\/abs\/2312.10997"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.580"},{"key":"e_1_3_2_1_9_1","volume-title":"Zijuan Lin, Liyang Zhou, Chenyu Ran, Lingfeng Xiao, Chenglin Wu, and J\u00fcrgen Schmidhuber.","author":"Hong Sirui","year":"2024","unstructured":"Sirui Hong, Mingchen Zhuge, Jiaqi Chen, Xiawu Zheng, Yuheng Cheng, Ceyao Zhang, Jinlin Wang, Zili Wang, Steven Ka Shing Yau, Zijuan Lin, Liyang Zhou, Chenyu Ran, Lingfeng Xiao, Chenglin Wu, and J\u00fcrgen Schmidhuber. 2024. MetaGPT: Meta Programming for A Multi-Agent Collaborative Framework. arXiv:2308.00352 [cs.AI] https:\/\/arxiv.org\/abs\/2308.00352"},{"key":"e_1_3_2_1_10_1","unstructured":"Xinming Hou Mingming Yang Wenxiang Jiao Xing Wang Zhaopeng Tu and Wayne Xin Zhao. 2024. CoAct: A Global-Local Hierarchy for Autonomous Agent Collaboration. arXiv:2406.13381 [cs.CL] https:\/\/arxiv.org\/abs\/2406.13381"},{"key":"e_1_3_2_1_11_1","unstructured":"Bowen Jin Hansi Zeng Zhenrui Yue Jinsung Yoon Sercan Arik Dong Wang Hamed Zamani and Jiawei Han. 2025a. Search-R1: Training LLMs to Reason and Leverage Search Engines with Reinforcement Learning. arXiv:2503.09516 [cs.CL] https:\/\/arxiv.org\/abs\/2503.09516"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3701716.3715313"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1147"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_15_1","unstructured":"Timo Kaufmann Paul Weng Viktor Bengs and Eyke H\u00fcllermeier. 2024. A Survey of Reinforcement Learning from Human Feedback. arXiv:2312.14925 [cs.LG] https:\/\/arxiv.org\/abs\/2312.14925"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 30th International Conference on Neural Information Processing Systems","author":"Kulkarni Tejas D.","unstructured":"Tejas D. Kulkarni, Karthik R. Narasimhan, Ardavan Saeedi, and Joshua B. Tenenbaum. 2016. Hierarchical deep reinforcement learning: integrating temporal abstraction and intrinsic motivation. In Proceedings of the 30th International Conference on Neural Information Processing Systems (Barcelona, Spain) (NIPS'16). Curran Associates Inc., Red Hook, NY, USA, 3682\u20133690."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_18_1","volume-title":"Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela.","author":"Lewis Patrick","year":"2021","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2021. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. arXiv:2005.11401 [cs.CL] https:\/\/arxiv.org\/abs\/2005.11401"},{"key":"e_1_3_2_1_19_1","unstructured":"Xiaoxi Li Guanting Dong Jiajie Jin Yuyao Zhang Yujia Zhou Yutao Zhu Peitian Zhang and Zhicheng Dou. 2025. Search-o1: Agentic Search-Enhanced Large Reasoning Models. arXiv:2501.05366 [cs.AI] https:\/\/arxiv.org\/abs\/2501.05366"},{"key":"e_1_3_2_1_20_1","unstructured":"Nelson F. Liu Kevin Lin John Hewitt Ashwin Paranjape Michele Bevilacqua Fabio Petroni and Percy Liang. 2023. Lost in the Middle: How Language Models Use Long Contexts. arXiv:2307.03172 [cs.CL] https:\/\/arxiv.org\/abs\/2307.03172"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.546"},{"key":"e_1_3_2_1_22_1","unstructured":"Lingrui Mei Jiayu Yao Yuyao Ge Yiwei Wang Baolong Bi Yujun Cai Jiazhi Liu Mingyu Li Zhong-Zhi Li Duzhen Zhang Chenlin Zhou Jiayi Mao Tianze Xia Jiafeng Guo and Shenghua Liu. 2025. A Survey of Context Engineering for Large Language Models. arXiv:2507.13334 [cs.CL] https:\/\/arxiv.org\/abs\/2507.13334"},{"key":"e_1_3_2_1_23_1","unstructured":"Ofir Nachum Shixiang Gu Honglak Lee and Sergey Levine. 2018. Data-Efficient Hierarchical Reinforcement Learning. arXiv:1805.08296 [cs.LG] https:\/\/arxiv.org\/abs\/1805.08296"},{"key":"e_1_3_2_1_24_1","unstructured":"OpenAI. 2024. GPT-4o System Card. arXiv:2410.21276 [cs.CL] https:\/\/arxiv.org\/abs\/2410.21276"},{"key":"e_1_3_2_1_25_1","volume-title":"Gonzalez","author":"Packer Charles","year":"2024","unstructured":"Charles Packer, Sarah Wooders, Kevin Lin, Vivian Fang, Shishir G. Patil, Ion Stoica, and Joseph E. Gonzalez. 2024. MemGPT: Towards LLMs as Operating Systems. arXiv:2310.08560 [cs.AI] https:\/\/arxiv.org\/abs\/2310.08560"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3453160"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.378"},{"key":"e_1_3_2_1_28_1","unstructured":"Yujia Qin Shihao Liang Yining Ye Kunlun Zhu Lan Yan Yaxi Lu Yankai Lin Xin Cong Xiangru Tang Bill Qian Sihan Zhao Lauren Hong Runchu Tian Ruobing Xie Jie Zhou Mark Gerstein Dahai Li Zhiyuan Liu and Maosong Sun. 2023. ToolLLM: Facilitating Large Language Models to Master 16000 Real-world APIs. arXiv:2307.16789 [cs.AI] https:\/\/arxiv.org\/abs\/2307.16789"},{"key":"e_1_3_2_1_29_1","unstructured":"Qwen: An Yang Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chengyuan Li Dayiheng Liu Fei Huang Haoran Wei Huan Lin Jian Yang Jianhong Tu Jianwei Zhang Jianxin Yang Jiaxi Yang Jingren Zhou Junyang Lin Kai Dang Keming Lu Keqin Bao Kexin Yang Le Yu Mei Li Mingfeng Xue Pei Zhang Qin Zhu Rui Men Runji Lin Tianhao Li Tianyi Tang Tingyu Xia Xingzhang Ren Xuancheng Ren Yang Fan Yang Su Yichang Zhang Yu Wan Yuqiong Liu Zeyu Cui Zhenru Zhang and Zihan Qiu. 2025. Qwen2.5 Technical Report. arXiv:2412.15115 [cs.CL] https:\/\/arxiv.org\/abs\/2412.15115"},{"key":"e_1_3_2_1_30_1","unstructured":"Rafael Rafailov Archit Sharma Eric Mitchell Stefano Ermon Christopher D. Manning and Chelsea Finn. 2024. Direct Preference Optimization: Your Language Model is Secretly a Reward Model. arXiv:2305.18290 [cs.LG] https:\/\/arxiv.org\/abs\/2305.18290"},{"key":"e_1_3_2_1_31_1","volume-title":"Toolformer: Language Models Can Teach Themselves to Use Tools. arXiv:2302.04761 [cs.CL] https:\/\/arxiv.org\/abs\/2302.04761","author":"Schick Timo","year":"2023","unstructured":"Timo Schick, Jane Dwivedi-Yu, Roberto Dess\u00ec, Roberta Raileanu, Maria Lomeli, Luke Zettlemoyer, Nicola Cancedda, and Thomas Scialom. 2023. Toolformer: Language Models Can Teach Themselves to Use Tools. arXiv:2302.04761 [cs.CL] https:\/\/arxiv.org\/abs\/2302.04761"},{"key":"e_1_3_2_1_32_1","unstructured":"John Schulman Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. arXiv:1707.06347 [cs.LG] https:\/\/arxiv.org\/abs\/1707.06347"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.620"},{"key":"e_1_3_2_1_34_1","unstructured":"Zhihong Shao Peiyi Wang Qihao Zhu Runxin Xu Junxiao Song Xiao Bi Haowei Zhang Mingchuan Zhang Y. K. Li Y. Wu and Daya Guo. 2024. DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models. arXiv:2402.03300 [cs.CL] https:\/\/arxiv.org\/abs\/2402.03300"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3689031.3696075"},{"key":"e_1_3_2_1_36_1","unstructured":"Yaorui Shi Sihang Li Chang Wu Zhiyuan Liu Junfeng Fang Hengxing Cai An Zhang and Xiang Wang. 2025. Search and Refine During Think: Autonomous Retrieval-Augmented Reasoning of LLMs. arXiv:2505.11277 [cs.CL] https:\/\/arxiv.org\/abs\/2505.11277"},{"key":"e_1_3_2_1_37_1","volume-title":"Lei Fang, and Ji-Rong Wen.","author":"Song Huatong","year":"2025","unstructured":"Huatong Song, Jinhao Jiang, Yingqian Min, Jie Chen, Zhipeng Chen, Wayne Xin Zhao, Lei Fang, and Ji-Rong Wen. 2025a. R1-Searcher: Incentivizing the Search Capability in LLMs via Reinforcement Learning. arXiv:2503.05592 [cs.AI] https:\/\/arxiv.org\/abs\/2503.05592"},{"key":"e_1_3_2_1_38_1","volume-title":"Lei Fang, and Ji-Rong Wen.","author":"Song Huatong","year":"2025","unstructured":"Huatong Song, Jinhao Jiang, Wenqing Tian, Zhipeng Chen, Yuhuan Wu, Jiahao Zhao, Yingqian Min, Wayne Xin Zhao, Lei Fang, and Ji-Rong Wen. 2025b. R1-Searcher: Incentivizing the Dynamic Knowledge Acquisition of LLMs via Reinforcement Learning. arXiv:2505.17005 [cs.CL] https:\/\/arxiv.org\/abs\/2505.17005"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00475"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.557"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Harsh Trivedi Niranjan Balasubramanian Tushar Khot and Ashish Sabharwal. 2023b. Interleaving Retrieval with Chain-of-Thought Reasoning for Knowledge-Intensive Multi-Step Questions. arXiv:2212.10509 [cs.CL] https:\/\/arxiv.org\/abs\/2212.10509","DOI":"10.18653\/v1\/2023.acl-long.557"},{"key":"e_1_3_2_1_43_1","volume-title":"FeUdal Networks for Hierarchical Reinforcement Learning. CoRR","author":"Vezhnevets Alexander Sasha","year":"2017","unstructured":"Alexander Sasha Vezhnevets, Simon Osindero, Tom Schaul, Nicolas Heess, Max Jaderberg, David Silver, and Koray Kavukcuoglu. 2017. FeUdal Networks for Hierarchical Reinforcement Learning. CoRR, Vol. abs\/1703.01161 (2017). arXiv:1703.01161 http:\/\/arxiv.org\/abs\/1703.01161"},{"key":"e_1_3_2_1_44_1","unstructured":"Liang Wang Nan Yang Xiaolong Huang Binxing Jiao Linjun Yang Daxin Jiang Rangan Majumder and Furu Wei. 2024. Text Embeddings by Weakly-Supervised Contrastive Pre-training. arXiv:2212.03533 [cs.CL] https:\/\/arxiv.org\/abs\/2212.03533"},{"key":"e_1_3_2_1_45_1","volume-title":"Ryen W White, Doug Burger, and Chi Wang.","author":"Wu Qingyun","year":"2023","unstructured":"Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, and Chi Wang. 2023. AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation. arXiv:2308.08155 [cs.AI] https:\/\/arxiv.org\/abs\/2308.08155"},{"key":"e_1_3_2_1_46_1","volume-title":"RECOMP: Improving Retrieval-Augmented LMs with Compression and Selective Augmentation. arXiv:2310.04408 [cs.CL] https:\/\/arxiv.org\/abs\/2310.04408","author":"Xu Fangyuan","year":"2023","unstructured":"Fangyuan Xu, Weijia Shi, and Eunsol Choi. 2023. RECOMP: Improving Retrieval-Augmented LMs with Compression and Selective Augmentation. arXiv:2310.04408 [cs.CL] https:\/\/arxiv.org\/abs\/2310.04408"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1259"},{"key":"e_1_3_2_1_48_1","unstructured":"Shunyu Yao Jeffrey Zhao Dian Yu Nan Du Izhak Shafran Karthik Narasimhan and Yuan Cao. 2023. ReAct: Synergizing Reasoning and Acting in Language Models. arXiv:2210.03629 [cs.CL] https:\/\/arxiv.org\/abs\/2210.03629"},{"key":"e_1_3_2_1_49_1","unstructured":"Wentao Zhang Ce Cui Yilei Zhao Rui Hu Yang Liu Yahui Zhou and Bo An. 2025a. AgentOrchestra: A Hierarchical Multi-Agent Framework for General-Purpose Task Solving. arXiv:2506.12508 [cs.AI] https:\/\/arxiv.org\/abs\/2506.12508"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Yuyao Zhang Zhicheng Dou Xiaoxi Li Jiajie Jin Yongkang Wu Zhonghua Li Qi Ye and Ji-Rong Wen. 2025b. Neuro-Symbolic Query Compiler. arXiv:2505.11932 [cs.CL] https:\/\/arxiv.org\/abs\/2505.11932","DOI":"10.18653\/v1\/2025.findings-acl.628"}],"event":{"name":"WSDM '26:The Nineteenth ACM International Conference on Web Search and Data Mining","location":"Boise ID USA","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the Nineteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"deposited":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T18:01:41Z","timestamp":1771264901000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3773966.3777986"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,21]]},"references-count":50,"alternative-id":["10.1145\/3773966.3777986","10.1145\/3773966"],"URL":"https:\/\/doi.org\/10.1145\/3773966.3777986","relation":{},"subject":[],"published":{"date-parts":[[2026,2,21]]},"assertion":[{"value":"2026-02-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}