{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T20:26:46Z","timestamp":1730233606535,"version":"3.28.0"},"reference-count":19,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,10,3]],"date-time":"2022-10-03T00:00:00Z","timestamp":1664755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,3]],"date-time":"2022-10-03T00:00:00Z","timestamp":1664755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,10,3]]},"DOI":"10.1109\/icccnt54827.2022.9984457","type":"proceedings-article","created":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T19:44:09Z","timestamp":1672083849000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Deep Reinforcement Learning Algorithms for Machine-to-Machine Communications: A Review"],"prefix":"10.1109","author":[{"given":"Devarani Devi","family":"Ningombam","sequence":"first","affiliation":[{"name":"University of Petroleum and Energy Studies (UPES),School of Computer Science,Department of Informatics,Dehradun,Uttarakhand,India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.2976000"},{"article-title":"Exploration-Exploitation Trade-off in Reinforcement Learning on Online Markov Decision Processes with Global Concave Rewards","year":"2019","author":"Cheung","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3113350"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3171053"},{"journal-title":"Methods of Hierarchical Clustering. Computing Research Repository - CORR","year":"2011","author":"Murtagh","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-31293-4_19"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3012204"},{"article-title":"Exploration-Exploitation Trade-off in Reinforcement Learning on Online Markov Decision Processes with Global Concave Rewards","year":"2019","author":"Cheung","key":"ref8"},{"key":"ref9","article-title":"A New Framework for Multi-Agent Reinforcement Learning -- Centralized Training and Exploration with Decentralized Execution via Policy Distillation","volume-title":"Oct.","author":"Chen","year":"2019"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3198981"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1155\/2018\/7172614"},{"key":"ref12","first-page":"8","article-title":"LIIR: Learning individual intrinsic reward in multi-agent reinforcement learning","volume-title":"Annual Conference on Neural Information Processing Systems","author":"Du"},{"article-title":"Social influence as intrinsic motivation for multi-agent deep reinforcement learning","volume-title":"the International Conference on Machine Learning","author":"Jaques","key":"ref13"},{"key":"ref14","first-page":"1146","article-title":"Stabilising experience replay for deep multi-agent reinforcement learning","volume-title":"Proceedings of the 34th International Conference on Machine Learning","volume":"70","author":"Foerster"},{"key":"ref16","article-title":"Multi-robot Task Allocation using Agglomerative Clustering","volume-title":"Thesis. Rochester Institute of Technology","author":"Shoaib","year":"2016"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref18","first-page":"2186","article-title":"The starcraft multi-agent challenge","volume-title":"Proc. 18th Int. Conf. Auton. Agents Multiagent Syst","author":"Samvelyan"},{"article-title":"High-dimensional continuous control using generalized advantage estimation","year":"2015","author":"Schulman","key":"ref19"},{"journal-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref20"}],"event":{"name":"2022 13th International Conference on Computing Communication and Networking Technologies (ICCCNT)","start":{"date-parts":[[2022,10,3]]},"location":"Kharagpur, India","end":{"date-parts":[[2022,10,5]]}},"container-title":["2022 13th International Conference on Computing Communication and Networking Technologies (ICCCNT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9984157\/9984211\/09984457.pdf?arnumber=9984457","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T03:57:27Z","timestamp":1706759847000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9984457\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,3]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/icccnt54827.2022.9984457","relation":{},"subject":[],"published":{"date-parts":[[2022,10,3]]}}}