{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T04:46:21Z","timestamp":1766983581954,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,8]],"date-time":"2024-04-08T00:00:00Z","timestamp":1712534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,8]]},"DOI":"10.1145\/3605098.3636028","type":"proceedings-article","created":{"date-parts":[[2024,5,21]],"date-time":"2024-05-21T17:59:16Z","timestamp":1716314356000},"page":"1007-1013","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Reward Specifications in Collaborative Multi-agent Learning: A Comparative Study"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9040-5842","authenticated-orcid":false,"given":"Maram","family":"Hasan","sequence":"first","affiliation":[{"name":"Computer Science and Engineering, Indian Institute of Technology Roorkee, Roorkee, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1664-4882","authenticated-orcid":false,"given":"Rajdeep","family":"Niyogi","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, Indian Institute of Technology Roorkee, Roorkee, India"}]}],"member":"320","published-online":{"date-parts":[[2024,5,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Feudal multi-agent hierarchies for cooperative reinforcement learning. arXiv preprint arXiv:1901.08492","author":"Ahilan Sanjeevan","year":"2019","unstructured":"Sanjeevan Ahilan and Peter Dayan. 2019. Feudal multi-agent hierarchies for cooperative reinforcement learning. arXiv preprint arXiv:1901.08492 (2019)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-14463-9_13"},{"key":"e_1_3_2_1_3_1","volume-title":"Rudder: Return decomposition for delayed rewards. Advances in Neural Information Processing Systems 32","author":"Arjona-Medina Jose A","year":"2019","unstructured":"Jose A Arjona-Medina, Michael Gillhofer, Michael Widrich, Thomas Unterthiner, Johannes Brandstetter, and Sepp Hochreiter. 2019. Rudder: Return decomposition for delayed rewards. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_4_1","volume-title":"A survey on intrinsic motivation in reinforcement learning. arXiv preprint arXiv:1908.06976","author":"Aubret Arthur","year":"2019","unstructured":"Arthur Aubret, Laetitia Matignon, and Salima Hassas. 2019. A survey on intrinsic motivation in reinforcement learning. arXiv preprint arXiv:1908.06976 (2019)."},{"key":"e_1_3_2_1_5_1","volume-title":"Unifying count-based exploration and intrinsic motivation. Advances in neural information processing systems 29","author":"Bellemare Marc","year":"2016","unstructured":"Marc Bellemare, Sriram Srinivasan, Georg Ostrovski, Tom Schaul, David Saxton, and Remi Munos. 2016. Unifying count-based exploration and intrinsic motivation. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_1_6_1","volume-title":"Large-scale study of curiosity-driven learning. arXiv preprint arXiv:1808.04355","author":"Burda Yuri","year":"2018","unstructured":"Yuri Burda, Harri Edwards, Deepak Pathak, Amos Storkey, Trevor Darrell, and Alexei A Efros. 2018. Large-scale study of curiosity-driven learning. arXiv preprint arXiv:1808.04355 (2018)."},{"key":"e_1_3_2_1_7_1","volume-title":"Shared experience actor-critic for multi-agent reinforcement learning. arXiv preprint arXiv:2006.07169","author":"Christianos Filippos","year":"2020","unstructured":"Filippos Christianos, Lukas Sch\u00e4fer, and Stefano V Albrecht. 2020. Shared experience actor-critic for multi-agent reinforcement learning. arXiv preprint arXiv:2006.07169 (2020)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2901791"},{"key":"e_1_3_2_1_9_1","volume-title":"The 10th international conference on autonomous agents and multiagent systems. ACM, 225--232","author":"Devlin Sam","year":"2011","unstructured":"Sam Devlin and Daniel Kudenko. 2011. Theoretical considerations of potential-based reward shaping for multi-agent systems. In The 10th international conference on autonomous agents and multiagent systems. ACM, 225--232."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/2615731.2615761"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 11th international conference on autonomous agents and multiagent systems. IFAAMAS, 433--440","author":"Devlin Sam Michael","year":"2012","unstructured":"Sam Michael Devlin and Daniel Kudenko. 2012. Dynamic potential-based reward shaping. In Proceedings of the 11th international conference on autonomous agents and multiagent systems. IFAAMAS, 433--440."},{"key":"e_1_3_2_1_12_1","volume-title":"2014 AAAI Spring Symposium Series.","author":"Dewey Daniel","year":"2014","unstructured":"Daniel Dewey. 2014. Reinforcement learning and the reward engineering principle. In 2014 AAAI Spring Symposium Series."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"e_1_3_2_1_14_1","volume-title":"Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation. Advances in neural information processing systems 29","author":"Kulkarni Tejas D","year":"2016","unstructured":"Tejas D Kulkarni, Karthik Narasimhan, Ardavan Saeedi, and Josh Tenenbaum. 2016. Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation. Advances in neural information processing systems 29 (2016), 3675--3683."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5555\/2051237.2051249"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski et al. 2015. Human-level control through deep reinforcement learning. nature 518 7540 (2015) 529--533.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_17_1","volume-title":"Data-efficient hierarchical reinforcement learning. arXiv preprint arXiv:1805.08296","author":"Nachum Ofir","year":"2018","unstructured":"Ofir Nachum, Shixiang Gu, Honglak Lee, and Sergey Levine. 2018. Data-efficient hierarchical reinforcement learning. arXiv preprint arXiv:1805.08296 (2018)."},{"key":"e_1_3_2_1_18_1","volume-title":"Icml","volume":"99","author":"Ng Andrew Y","year":"1999","unstructured":"Andrew Y Ng, Daishi Harada, and Stuart Russell. 1999. Policy invariance under reward transformations: Theory and application to reward shaping. In Icml, Vol. 99. Citeseer, 278--287."},{"key":"e_1_3_2_1_19_1","first-page":"2","article-title":"Algorithms for inverse reinforcement learning","volume":"1","author":"Ng Andrew Y","year":"2000","unstructured":"Andrew Y Ng, Stuart Russell, et al. 2000. Algorithms for inverse reinforcement learning.. In Icml, Vol. 1. 2.","journal-title":"Icml"},{"key":"e_1_3_2_1_20_1","volume-title":"Benchmarking multi-agent deep reinforcement learning algorithms in cooperative tasks. arXiv preprint arXiv:2006.07869","author":"Papoudakis Georgios","year":"2020","unstructured":"Georgios Papoudakis, Filippos Christianos, Lukas Sch\u00e4fer, and Stefano V Albrecht. 2020. Benchmarking multi-agent deep reinforcement learning algorithms in cooperative tasks. arXiv preprint arXiv:2006.07869 (2020)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455894"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-61616-8_33"},{"volume-title":"Cooperative multi-agent deep reinforcement learning with counterfactual reward. In 2020 International joint conference on neural networks (IJCNN)","author":"Shao Kun","key":"e_1_3_2_1_24_1","unstructured":"Kun Shao, Yuanheng Zhu, Zhentao Tang, and Dongbin Zhao. 2020. Cooperative multi-agent deep reinforcement learning with counterfactual reward. In 2020 International joint conference on neural networks (IJCNN). IEEE, 1--8."},{"key":"e_1_3_2_1_25_1","volume-title":"Agent-time attention for sparse rewards multi-agent reinforcement learning. arXiv preprint arXiv:2210.17540","author":"She Jennifer","year":"2022","unstructured":"Jennifer She, Jayesh K Gupta, and Mykel J Kochenderfer. 2022. Agent-time attention for sparse rewards multi-agent reinforcement learning. arXiv preprint arXiv:2210.17540 (2022)."},{"key":"e_1_3_2_1_26_1","unstructured":"Richard S. Sutton. 2004. The reward hypothesis. (2004). http:\/\/incompleteideas.net\/rlai.cs.ualberta.ca\/RLAI\/rewardhypothesis.html"},{"key":"e_1_3_2_1_27_1","volume-title":"On bonus-based exploration methods in the arcade learning environment. arXiv preprint arXiv:2109.11052","author":"Taiga Adrien Ali","year":"2021","unstructured":"Adrien Ali Taiga, William Fedus, Marlos C Machado, Aaron Courville, and Marc G Bellemare. 2021. On bonus-based exploration methods in the arcade learning environment. arXiv preprint arXiv:2109.11052 (2021)."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the 20th international conference on machine learning (ICML-03)","author":"Wiewiora Eric","year":"2003","unstructured":"Eric Wiewiora, Garrison W Cottrell, and Charles Elkan. 2003. Principled methods for advising reinforcement learning agents. In Proceedings of the 20th international conference on machine learning (ICML-03). 792--799."}],"event":{"name":"SAC '24: 39th ACM\/SIGAPP Symposium on Applied Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"],"location":"Avila Spain","acronym":"SAC '24"},"container-title":["Proceedings of the 39th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605098.3636028","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3605098.3636028","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:59Z","timestamp":1750291439000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605098.3636028"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,8]]},"references-count":28,"alternative-id":["10.1145\/3605098.3636028","10.1145\/3605098"],"URL":"https:\/\/doi.org\/10.1145\/3605098.3636028","relation":{},"subject":[],"published":{"date-parts":[[2024,4,8]]},"assertion":[{"value":"2024-05-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}