{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T12:26:14Z","timestamp":1730204774706,"version":"3.28.0"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,6]],"date-time":"2022-12-06T00:00:00Z","timestamp":1670284800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,6]],"date-time":"2022-12-06T00:00:00Z","timestamp":1670284800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,6]]},"DOI":"10.1109\/cdc51059.2022.9992660","type":"proceedings-article","created":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T14:26:56Z","timestamp":1673360816000},"page":"2379-2386","source":"Crossref","is-referenced-by-count":1,"title":["Event-Based Communication in Distributed Q-Learning"],"prefix":"10.1109","author":[{"given":"Daniel Jarne","family":"Ornia","sequence":"first","affiliation":[{"name":"Delft University of Technology,Delft Centre for Systems and Control,Delft,The Netherlands"}]},{"given":"Manuel","family":"Mazo","sequence":"additional","affiliation":[{"name":"Delft University of Technology,Delft Centre for Systems and Control,Delft,The Netherlands"}]}],"member":"263","reference":[{"article-title":"Distributed prioritized experience replay","year":"2018","author":"horgan","key":"ref13"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2011.2174666"},{"article-title":"Massively parallel methods for deep reinforcement learning","year":"2015","author":"nair","key":"ref12"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacsc.2021.100144"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref37","first-page":"302","article-title":"Decentralized markov decision processes with event-driven interactions","author":"becker","year":"2004","journal-title":"Proceedings of the Third International Joint Conference on Autonomous Agents and Multiagent Systems-Volume 1"},{"key":"ref14","article-title":"Recurrent experience replay in distributed reinforcement learning","author":"kapturowski","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CDC45484.2021.9683215"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"109009","DOI":"10.1016\/j.automatica.2020.109009","article-title":"Event-triggered learning","volume":"117","author":"solowjow","year":"2020","journal-title":"Automatica"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6206"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/0921-8890(95)00018-B"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2014.6889787"},{"key":"ref10","first-page":"242","article-title":"Multiagent reinforcement learning: theoretical framework and an algorithm","volume":"98","author":"hu","year":"1998","journal-title":"ICML"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2017.03.013"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref2"},{"key":"ref1","volume":"2050","author":"bellman","year":"2015","journal-title":"Applied Dynamic Programming"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_14"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.2975749"},{"key":"ref16","article-title":"Multiagent reinforcement learning for multi-robot systems: A survey","author":"yang","year":"2004","journal-title":"Tech Rep Tech Rep"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029257"},{"article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","year":"2017","author":"lowe","key":"ref19"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"article-title":"Communication-efficient distributed reinforcement learning","year":"2018","author":"chen","key":"ref24"},{"article-title":"Federated learning: Strategies for improving communication efficiency","year":"2016","author":"kone?n?","key":"ref23"},{"article-title":"Graph neural networks for decentralized multi-robot path planning","year":"2019","author":"li","key":"ref26"},{"article-title":"Learning to communicate with deep multi-agent reinforcement learning","year":"2016","author":"foerster","key":"ref25"},{"key":"ref20","article-title":"An algorithm for distributed reinforcement learning in cooperative multi-agent systems","author":"lauer","year":"2000","journal-title":"Proc Seventh Int Conf Machine Learning"},{"article-title":"Openai gym","year":"2016","author":"brockman","key":"ref42"},{"article-title":"Prioritized experience replay","year":"2015","author":"schaul","key":"ref41"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-005-2631-2"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015410"},{"key":"ref43","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2011.2164036"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2007.904277"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2008.4739414"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"article-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref7"},{"key":"ref9","first-page":"195","article-title":"Planning, learning and coordination in multiagent decision processes","volume":"96","author":"boutilier","year":"1996","journal-title":"TARK"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"watkins","year":"1992","journal-title":"Machine Learning"},{"article-title":"Playing atari with deep reinforcement learning","year":"2013","author":"mnih","key":"ref6"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref40","first-page":"1","article-title":"Convergence of q-learning: A simple proof","author":"melo","year":"2001","journal-title":"Institute of Systems and Robotics Tech Rep"}],"event":{"name":"2022 IEEE 61st Conference on Decision and Control (CDC)","start":{"date-parts":[[2022,12,6]]},"location":"Cancun, Mexico","end":{"date-parts":[[2022,12,9]]}},"container-title":["2022 IEEE 61st Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9992315\/9992317\/09992660.pdf?arnumber=9992660","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,7]],"date-time":"2023-02-07T14:18:30Z","timestamp":1675779510000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9992660\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,6]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/cdc51059.2022.9992660","relation":{},"subject":[],"published":{"date-parts":[[2022,12,6]]}}}