{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T14:10:09Z","timestamp":1755871809075,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,1,4]],"date-time":"2024-01-04T00:00:00Z","timestamp":1704326400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,1,4]]},"DOI":"10.1145\/3632410.3632430","type":"proceedings-article","created":{"date-parts":[[2024,1,3]],"date-time":"2024-01-03T18:15:16Z","timestamp":1704305716000},"page":"73-81","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["DSDF: Coordinated look-ahead strategy in multi-agent reinforcement learning with noisy agents"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5132-2144","authenticated-orcid":false,"given":"Satheesh Kumar","family":"Perepu","sequence":"first","affiliation":[{"name":"Ericsson Research, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5795-9063","authenticated-orcid":false,"given":"Kaushik","family":"Dey","sequence":"additional","affiliation":[{"name":"Ericsson Research, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2327-1618","authenticated-orcid":false,"given":"Abir","family":"Das","sequence":"additional","affiliation":[{"name":"Indian Institue of Technology Kharagpur, India"}]}],"member":"320","published-online":{"date-parts":[[2024,1,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"The complexity of decentralized control of Markov decision processes. Mathematics of operations research 27, 4","author":"Bernstein S","year":"2002","unstructured":"Daniel\u00a0S Bernstein, Robert Givan, Neil Immerman, and Shlomo Zilberstein. 2002. The complexity of decentralized control of Markov decision processes. Mathematics of operations research 27, 4 (2002), 819\u2013840."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-47358-7_7"},{"key":"e_1_3_2_1_3_1","volume-title":"Autonomous air traffic controller: A deep multi-agent reinforcement learning approach. arXiv preprint arXiv:1905.01303","author":"Brittain Marc","year":"2019","unstructured":"Marc Brittain and Peng Wei. 2019. Autonomous air traffic controller: A deep multi-agent reinforcement learning approach. arXiv preprint arXiv:1905.01303 (2019)."},{"key":"e_1_3_2_1_4_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 3230\u20133269","author":"Chen Yiding","year":"2023","unstructured":"Yiding Chen, Xuezhou Zhang, Kaiqing Zhang, Mengdi Wang, and Xiaojin Zhu. 2023. Byzantine-robust online and offline distributed reinforcement learning. In International Conference on Artificial Intelligence and Statistics. PMLR, 3230\u20133269."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2901791"},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Machine Learning. PMLR, 1282\u20131289","author":"Cobbe Karl","year":"2019","unstructured":"Karl Cobbe, Oleg Klimov, Chris Hesse, Taehoon Kim, and John Schulman. 2019. Quantifying generalization in reinforcement learning. In International Conference on Machine Learning. PMLR, 1282\u20131289."},{"key":"e_1_3_2_1_7_1","volume-title":"Is independent learning all you need in the starcraft multi-agent challenge?arXiv preprint arXiv:2011.09533","author":"de Witt Christian\u00a0Schroeder","year":"2020","unstructured":"Christian\u00a0Schroeder de Witt, Tarun Gupta, Denys Makoviichuk, Viktor Makoviychuk, Philip\u00a0HS Torr, Mingfei Sun, and Shimon Whiteson. 2020. Is independent learning all you need in the starcraft multi-agent challenge?arXiv preprint arXiv:2011.09533 (2020)."},{"key":"e_1_3_2_1_8_1","volume-title":"Challenges of real-world reinforcement learning. arXiv preprint arXiv:1904.12901","author":"Dulac-Arnold Gabriel","year":"2019","unstructured":"Gabriel Dulac-Arnold, Daniel Mankowitz, and Todd Hester. 2019. Challenges of real-world reinforcement learning. arXiv preprint arXiv:1904.12901 (2019)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"e_1_3_2_1_10_1","volume-title":"Deep multi-agent reinforcement learning with discrete-continuous hybrid action spaces. arXiv preprint arXiv:1903.04959","author":"Fu Haotian","year":"2019","unstructured":"Haotian Fu, Hongyao Tang, Jianye Hao, Zihan Lei, Yingfeng Chen, and Changjie Fan. 2019. Deep multi-agent reinforcement learning with discrete-continuous hybrid action spaces. arXiv preprint arXiv:1903.04959 (2019)."},{"volume-title":"Cooperative Multi-agent Control Using Deep Reinforcement Learning","author":"Gupta K.","key":"e_1_3_2_1_11_1","unstructured":"Jayesh\u00a0K. Gupta, Maxim Egorov, and Mykel Kochenderfer. 2017. Cooperative Multi-agent Control Using Deep Reinforcement Learning. In Autonomous Agents and Multiagent Systems, Gita Sukthankar and Juan\u00a0A. Rodriguez-Aguilar (Eds.). Springer International Publishing, Cham, 66\u201383."},{"key":"e_1_3_2_1_12_1","volume-title":"arXiv preprint arXiv:1609.09106","author":"Ha David","year":"2016","unstructured":"David Ha, Andrew Dai, and Quoc\u00a0V Le. 2016. Hypernetworks. arXiv preprint arXiv:1609.09106 (2016)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561103"},{"key":"e_1_3_2_1_14_1","volume-title":"International Conference on Machine Learning. PMLR, 3040\u20133049","author":"Jaques Natasha","year":"2019","unstructured":"Natasha Jaques, Angeliki Lazaridou, Edward Hughes, Caglar Gulcehre, Pedro Ortega, DJ Strouse, Joel\u00a0Z Leibo, and Nando De\u00a0Freitas. 2019. Social influence as intrinsic motivation for multi-agent deep reinforcement learning. In International Conference on Machine Learning. PMLR, 3040\u20133049."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5878"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219993"},{"key":"e_1_3_2_1_17_1","volume-title":"Feudal latent space exploration for coordinated multi-agent reinforcement learning","author":"Liu Xiangyu","year":"2022","unstructured":"Xiangyu Liu and Ying Tan. 2022. Feudal latent space exploration for coordinated multi-agent reinforcement learning. IEEE Transactions on Neural Networks and Learning Systems (2022)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS51040.2020.00031"},{"key":"e_1_3_2_1_19_1","volume-title":"Maven: Multi-agent variational exploration. arXiv preprint arXiv:1910.07483","author":"Mahajan Anuj","year":"2019","unstructured":"Anuj Mahajan, Tabish Rashid, Mikayel Samvelyan, and Shimon Whiteson. 2019. Maven: Multi-agent variational exploration. arXiv preprint arXiv:1910.07483 (2019)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCKE.2016.7802135"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933973"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.3390\/s23073625"},{"key":"e_1_3_2_1_23_1","volume-title":"Comparative Evaluation of Multi-Agent Deep Reinforcement Learning Algorithms. arXiv preprint arXiv:2006.07869","author":"Papoudakis Georgios","year":"2020","unstructured":"Georgios Papoudakis, Filippos Christianos, Lukas Sch\u00e4fer, and Stefano\u00a0V Albrecht. 2020. Comparative Evaluation of Multi-Agent Deep Reinforcement Learning Algorithms. arXiv preprint arXiv:2006.07869 (2020)."},{"key":"e_1_3_2_1_24_1","first-page":"12208","article-title":"Facmac: Factored multi-agent centralised policy gradients","volume":"34","author":"Peng Bei","year":"2021","unstructured":"Bei Peng, Tabish Rashid, Christian Schroeder\u00a0de Witt, Pierre-Alexandre Kamienny, Philip Torr, Wendelin B\u00f6hmer, and Shimon Whiteson. 2021. Facmac: Factored multi-agent centralised policy gradients. Advances in Neural Information Processing Systems 34 (2021), 12208\u201312221.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_25_1","volume-title":"Weighted qmix: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning. arXiv preprint arXiv:2006.10800","author":"Rashid Tabish","year":"2020","unstructured":"Tabish Rashid, Gregory Farquhar, Bei Peng, and Shimon Whiteson. 2020. Weighted qmix: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning. arXiv preprint arXiv:2006.10800 (2020)."},{"key":"e_1_3_2_1_26_1","volume-title":"Weighted qmix: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning. Advances in neural information processing systems 33","author":"Rashid Tabish","year":"2020","unstructured":"Tabish Rashid, Gregory Farquhar, Bei Peng, and Shimon Whiteson. 2020. Weighted qmix: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning. Advances in neural information processing systems 33 (2020), 10199\u201310210."},{"key":"e_1_3_2_1_27_1","volume-title":"International Conference on Machine Learning. PMLR, 4295\u20134304","author":"Rashid Tabish","year":"2018","unstructured":"Tabish Rashid, Mikayel Samvelyan, Christian Schroeder, Gregory Farquhar, Jakob Foerster, and Shimon Whiteson. 2018. Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning. In International Conference on Machine Learning. PMLR, 4295\u20134304."},{"key":"e_1_3_2_1_28_1","volume-title":"The StarCraft Multi-Agent Challenge. CoRR abs\/1902.04043","author":"Samvelyan Mikayel","year":"2019","unstructured":"Mikayel Samvelyan, Tabish Rashid, Christian\u00a0Schroeder de Witt, Gregory Farquhar, Nantas Nardelli, Tim G.\u00a0J. Rudner, Chia-Man Hung, Philiph H.\u00a0S. Torr, Jakob Foerster, and Shimon Whiteson. 2019. The StarCraft Multi-Agent Challenge. CoRR abs\/1902.04043 (2019)."},{"key":"e_1_3_2_1_29_1","volume-title":"Convergence results for single-step on-policy reinforcement-learning algorithms. Machine learning 38, 3","author":"Singh Satinder","year":"2000","unstructured":"Satinder Singh, Tommi Jaakkola, Michael\u00a0L Littman, and Csaba Szepesv\u00e1ri. 2000. Convergence results for single-step on-policy reinforcement-learning algorithms. Machine learning 38, 3 (2000), 287\u2013308."},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Machine Learning. PMLR, 5887\u20135896","author":"Son Kyunghwan","year":"2019","unstructured":"Kyunghwan Son, Daewoo Kim, Wan\u00a0Ju Kang, David\u00a0Earl Hostallero, and Yung Yi. 2019. Qtran: Learning to factorize with transformation for cooperative multi-agent reinforcement learning. In International Conference on Machine Learning. PMLR, 5887\u20135896."},{"key":"e_1_3_2_1_31_1","volume-title":"Value-decomposition networks for cooperative multi-agent learning. arXiv preprint arXiv:1706.05296","author":"Sunehag Peter","year":"2017","unstructured":"Peter Sunehag, Guy Lever, Audrunas Gruslys, Wojciech\u00a0Marian Czarnecki, Vinicius Zambaldi, Max Jaderberg, Marc Lanctot, Nicolas Sonnerat, Joel\u00a0Z Leibo, Karl Tuyls, 2017. Value-decomposition networks for cooperative multi-agent learning. arXiv preprint arXiv:1706.05296 (2017)."},{"key":"e_1_3_2_1_32_1","volume-title":"Cooperative Agents. In In Proceedings of the Tenth International Conference on Machine Learning. Morgan Kaufmann, 330\u2013337","author":"Tan Ming","year":"1993","unstructured":"Ming Tan. 1993. Multi-Agent Reinforcement Learning: Independent vs. Cooperative Agents. In In Proceedings of the Tenth International Conference on Machine Learning. Morgan Kaufmann, 330\u2013337."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10299-x"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3056418"},{"key":"e_1_3_2_1_35_1","volume-title":"Meta-gradient reinforcement learning. arXiv preprint arXiv:1805.09801","author":"Xu Zhongwen","year":"2018","unstructured":"Zhongwen Xu, Hado van Hasselt, and David Silver. 2018. Meta-gradient reinforcement learning. arXiv preprint arXiv:1805.09801 (2018)."},{"key":"e_1_3_2_1_36_1","volume-title":"Advances in Neural Information Processing Systems, H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.). Vol.\u00a033. Curran Associates","author":"Zhang Kaiqing","year":"2020","unstructured":"Kaiqing Zhang, TAO SUN, Yunzhe Tao, Sahika Genc, Sunil Mallya, and Tamer Basar. 2020. Robust Multi-Agent Reinforcement Learning with Model Uncertainty. In Advances in Neural Information Processing Systems, H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.). Vol.\u00a033. Curran Associates, Inc., 10571\u201310583. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/774412967f19ea61d448977ad9749078-Paper.pdf"},{"key":"e_1_3_2_1_37_1","volume-title":"Multi-agent reinforcement learning: A selective overview of theories and algorithms. Handbook of reinforcement learning and control","author":"Zhang Kaiqing","year":"2021","unstructured":"Kaiqing Zhang, Zhuoran Yang, and Tamer Ba\u015far. 2021. Multi-agent reinforcement learning: A selective overview of theories and algorithms. Handbook of reinforcement learning and control (2021), 321\u2013384."},{"key":"e_1_3_2_1_38_1","volume-title":"International Conference on Machine Learning. PMLR, 5872\u20135881","author":"Zhang Kaiqing","year":"2018","unstructured":"Kaiqing Zhang, Zhuoran Yang, Han Liu, Tong Zhang, and Tamer Basar. 2018. Fully decentralized multi-agent reinforcement learning with networked agents. In International Conference on Machine Learning. PMLR, 5872\u20135881."}],"event":{"name":"CODS-COMAD 2024: 7th Joint International Conference on Data Science & Management of Data (11th ACM IKDD CODS and 29th COMAD)","acronym":"CODS-COMAD 2024","location":"Bangalore India"},"container-title":["Proceedings of the 7th Joint International Conference on Data Science &amp; Management of Data (11th ACM IKDD CODS and 29th COMAD)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3632410.3632430","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3632410.3632430","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:35:25Z","timestamp":1755869725000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3632410.3632430"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,4]]},"references-count":38,"alternative-id":["10.1145\/3632410.3632430","10.1145\/3632410"],"URL":"https:\/\/doi.org\/10.1145\/3632410.3632430","relation":{},"subject":[],"published":{"date-parts":[[2024,1,4]]},"assertion":[{"value":"2024-01-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}