{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T17:13:01Z","timestamp":1772644381469,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,6,30]],"date-time":"2020-06-30T00:00:00Z","timestamp":1593475200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,6,30]]},"DOI":"10.1145\/3405962.3405996","type":"proceedings-article","created":{"date-parts":[[2020,8,25]],"date-time":"2020-08-25T04:24:02Z","timestamp":1598329442000},"page":"238-247","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Cooperative Multi-Agent Reinforcement Learning for Spectrum Management in IoT Cognitive Networks"],"prefix":"10.1145","author":[{"given":"Dejan","family":"Da\u0161i\u0107","sequence":"first","affiliation":[{"name":"Vlatacom Institute, Belgrade, Serbia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Miljan","family":"Vu\u010deti\u0107","sequence":"additional","affiliation":[{"name":"Vlatacom Institute, Belgrade, Serbia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Miroslav","family":"Peri\u0107","sequence":"additional","affiliation":[{"name":"Vlatacom Institute, Belgrade, Serbia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marko","family":"Beko","sequence":"additional","affiliation":[{"name":"COPELABS, ULHT, Lisbon, Portugal"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Milo\u0161","family":"Stankovi\u0107","sequence":"additional","affiliation":[{"name":"Singidunum Uni. and Vlatacom Inst., Belgrade, Serbia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,8,24]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2006.05.001"},{"key":"e_1_3_2_1_2_1","first-page":"1849","volume-title":"Singapore: Springer Singapore","author":"Felice M. Di","year":"2019","unstructured":"M. Di Felice , L. Bedogni , and L. Bononi , 2019 . Reinforcement Learning-Based Spectrum Management for Cognitive Radio Networks: A Literature Review and Case Study, in Handbook of Cognitive Radio , vol. 3-- 3 , Singapore: Springer Singapore , pp. 1849 -- 1886 . M. Di Felice, L. Bedogni, and L. Bononi, 2019. Reinforcement Learning-Based Spectrum Management for Cognitive Radio Networks: A Literature Review and Case Study, in Handbook of Cognitive Radio, vol. 3--3, Singapore: Springer Singapore, pp. 1849--1886."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2012.092712.120201"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2012.6364285"},{"key":"e_1_3_2_1_5_1","first-page":"1","volume-title":"2012 IEEE Vehicular Technology Conference (VTC Fall)","author":"Beko M.","unstructured":"M. Beko , S. Tomic , R. Dinis , and V. Lipovac , 2012. Convex optimization-based beamforming in cognitive radio multicast transmission , in 2012 IEEE Vehicular Technology Conference (VTC Fall) , pp. 1 - 5 , doi: 10.1109\/VTCFall.2012.6399378. 10.1109\/VTCFall.2012.6399378 M. Beko, S. Tomic, R. Dinis, and V. Lipovac, 2012. Convex optimization-based beamforming in cognitive radio multicast transmission, in 2012 IEEE Vehicular Technology Conference (VTC Fall), pp. 1 -5, doi: 10.1109\/VTCFall.2012.6399378."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2009.090109"},{"key":"e_1_3_2_1_7_1","first-page":"2244","volume-title":"21st Annual IEEE International Symposium on Personal, Indoor and Mobile Radio Communications, PIMRC, no. September","author":"Lo B. F.","unstructured":"B. F. Lo and I. F. Akyildiz , 2010. Reinforcement learning-based cooperative sensing in cognitive radio ad hoc networks , in 21st Annual IEEE International Symposium on Personal, Indoor and Mobile Radio Communications, PIMRC, no. September , pp. 2244 -- 2249 , doi: 10.1109\/PIMRC.2010.5671686. 10.1109\/PIMRC.2010.5671686 B. F. Lo and I. F. Akyildiz, 2010. Reinforcement learning-based cooperative sensing in cognitive radio ad hoc networks, in 21st Annual IEEE International Symposium on Personal, Indoor and Mobile Radio Communications, PIMRC, no. September, pp. 2244--2249, doi: 10.1109\/PIMRC.2010.5671686."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.2017.1600404"},{"key":"e_1_3_2_1_9_1","first-page":"469","volume-title":"2016 Int. Wirel. Commun. Mob. Comput. Conf. IWCMC 2016","author":"Khan A. A.","unstructured":"A. A. Khan , M. H. Rehmani , and A. Rachedi , 2016. When Cognitive Radio meets the Internet of Things? , 2016 Int. Wirel. Commun. Mob. Comput. Conf. IWCMC 2016 , pp. 469 -- 474 , doi: 10.1109\/IWCMC.2016.7577103. 10.1109\/IWCMC.2016.7577103 A. A. Khan, M. H. Rehmani, and A. Rachedi, 2016. When Cognitive Radio meets the Internet of Things?, 2016 Int. Wirel. Commun. Mob. Comput. Conf. IWCMC 2016, pp. 469--474, doi: 10.1109\/IWCMC.2016.7577103."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2016.2539923"},{"key":"e_1_3_2_1_11_1","unstructured":"R. S. Sutton and A. G. Barto 2018. Reinforcement learning: An introduction (2nd Edition). The MIT Press Cambridge Massachusetts.  R. S. Sutton and A. G. Barto 2018. Reinforcement learning: An introduction (2nd Edition). The MIT Press Cambridge Massachusetts."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"e_1_3_2_1_14_1","unstructured":"K. Zhang Z. Yang and T. Ba\u015far 2019. Multi-Agent Reinforcement Learning: A Selective Overview of Theories and Algorithms [Online]. Available: http:\/\/arxiv.org\/abs\/1911.10635.  K. Zhang Z. Yang and T. Ba\u015far 2019. Multi-Agent Reinforcement Learning: A Selective Overview of Theories and Algorithms [Online]. Available: http:\/\/arxiv.org\/abs\/1911.10635."},{"key":"e_1_3_2_1_15_1","volume-title":"Proc. Am. Control Conf.","volume":"172","author":"Stankovi\u0107 M. S.","unstructured":"M. S. Stankovi\u0107 and S. S. Stankovi\u0107 , 2016. Multi-agent temporal-difference learning with linear function approximation: Weak convergence under time-varying network topologies , Proc. Am. Control Conf. , vol. 2016-July, pp. 167-- 172 , doi: 10.1109\/ACC.2016.7524910. 10.1109\/ACC.2016.7524910 M. S. Stankovi\u0107 and S. S. Stankovi\u0107, 2016. Multi-agent temporal-difference learning with linear function approximation: Weak convergence under time-varying network topologies, Proc. Am. Control Conf., vol. 2016-July, pp. 167--172, doi: 10.1109\/ACC.2016.7524910."},{"key":"e_1_3_2_1_16_1","volume-title":"Networked Agents: Recent Advances, [Online]. Available: http:\/\/arxiv.org\/abs\/1912.03821.","author":"Zhang K.","year":"2019","unstructured":"K. Zhang , Z. Yang , and T. Ba\u015far , 2019 . Decentralized Multi-Agent Reinforcement Learning with Networked Agents: Recent Advances, [Online]. Available: http:\/\/arxiv.org\/abs\/1912.03821. K. Zhang, Z. Yang, and T. Ba\u015far, 2019. Decentralized Multi-Agent Reinforcement Learning with Networked Agents: Recent Advances, [Online]. Available: http:\/\/arxiv.org\/abs\/1912.03821."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"M. S. Stankovic M. Beko and S. S. Stankovic 2020. Distributed gradient temporal difference off-policy learning with eligibility traces: Weak convergence Accepted at IFAC World Congress.  M. S. Stankovic M. Beko and S. S. Stankovic 2020. Distributed gradient temporal difference off-policy learning with eligibility traces: Weak convergence Accepted at IFAC World Congress.","DOI":"10.1016\/j.ifacol.2020.12.2184"},{"key":"e_1_3_2_1_18_1","first-page":"9340","volume-title":"35th Int. Conf. Mach. Learn. ICML 2018","volume":"13","author":"Zhang K.","unstructured":"K. Zhang , Z. Yang , H. Liu , T. Zhang , and T. Ba\u015far , 2018. Fully decentralized multi-agent reinforcement learning with networked agents , 35th Int. Conf. Mach. Learn. ICML 2018 , vol. 13 , pp. 9340 -- 9371 . K. Zhang, Z. Yang, H. Liu, T. Zhang, and T. Ba\u015far, 2018. Fully decentralized multi-agent reinforcement learning with networked agents, 35th Int. Conf. Mach. Learn. ICML 2018, vol. 13, pp. 9340--9371."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"S. Kar J. M. F. Moura and H. V. Poor 2013. QD-learning: A collaborative distributed strategy for multi-agent reinforcement learning through consensus + innovations vol. 61 no. 7.  S. Kar J. M. F. Moura and H. V. Poor 2013. QD-learning: A collaborative distributed strategy for multi-agent reinforcement learning through consensus + innovations vol. 61 no. 7.","DOI":"10.1109\/TSP.2013.2241057"},{"key":"e_1_3_2_1_20_1","first-page":"66","volume-title":"Eds. Springer International Publishing","author":"Gupta J. K.","unstructured":"J. K. Gupta , M. Egorov , and M. Kochenderfer , 2017. Cooperative Multi-agent Control Using Deep Reinforcement Learning, in Autonomous Agents and Multiagent Systems, G. Sukthankar and J. A. Rodriguez-Aguilar , Eds. Springer International Publishing , pp. 66 -- 83 . J. K. Gupta, M. Egorov, and M. Kochenderfer, 2017. Cooperative Multi-agent Control Using Deep Reinforcement Learning, in Autonomous Agents and Multiagent Systems, G. Sukthankar and J. A. Rodriguez-Aguilar, Eds. Springer International Publishing, pp. 66--83."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2368731"},{"key":"e_1_3_2_1_22_1","first-page":"185","volume-title":"Handbook of Cognitive Radio","author":"Caso G.","unstructured":"G. Caso , M. T. P. Le , L. De Nardis , and M.-G. Di Benedetto , 2019. Non-cooperative and Cooperative Spectrum Sensing in 5G Cognitive Networks , in Handbook of Cognitive Radio , Singapore : Springer Singapore , pp. 185 -- 205 . G. Caso, M. T. P. Le, L. De Nardis, and M.-G. Di Benedetto, 2019. Non-cooperative and Cooperative Spectrum Sensing in 5G Cognitive Networks, in Handbook of Cognitive Radio, Singapore: Springer Singapore, pp. 185--205."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comcom.2016.07.012"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/1622737.1622748"},{"key":"e_1_3_2_1_25_1","first-page":"553","volume-title":"Proceedings of the International Scientific Conference - Sinteza","author":"Da\u0161i\u0107 D.","year":"2019","unstructured":"D. Da\u0161i\u0107 , M. Vu\u010deti\u0107 , G. Hew A Kee , and M. Stankovi\u0107 , 2019. Deep Learning Applications in Mobile Networks , in Proceedings of the International Scientific Conference - Sinteza 2019 , pp. 553 -- 560 , doi: 10.15308\/Sinteza-2019-553-560. 10.15308\/Sinteza-2019-553-560 D. Da\u0161i\u0107, M. Vu\u010deti\u0107, G. Hew A Kee, and M. Stankovi\u0107, 2019. Deep Learning Applications in Mobile Networks, in Proceedings of the International Scientific Conference - Sinteza 2019, pp. 553--560, doi: 10.15308\/Sinteza-2019-553-560."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-13315-2_11"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2011.08.007"},{"key":"e_1_3_2_1_28_1","volume-title":"2010 IEEE Int. Conf. Commun. Work. ICC 2010, doi: 10","author":"Yau K. L. A.","year":"2010","unstructured":"K. L. A. Yau , P. Komisarczuk , and P. D. Teal , 2010. Applications of reinforcement learning to cognitive radio networks , 2010 IEEE Int. Conf. Commun. Work. ICC 2010, doi: 10 .1109\/ICCW. 2010 .5503970. 10.1109\/ICCW.2010.5503970 K. L. A. Yau, P. Komisarczuk, and P. D. Teal, 2010. Applications of reinforcement learning to cognitive radio networks, 2010 IEEE Int. Conf. Commun. Work. ICC 2010, doi: 10.1109\/ICCW.2010.5503970."},{"key":"#cr-split#-e_1_3_2_1_29_1.1","doi-asserted-by":"crossref","unstructured":"M. A. Al-Garadi A. Mohamed A. Al-Ali X. Du I. Ali and M. Guizani 2020. A Survey of Machine and Deep Learning Methods for Internet of Things (IoT) Security IEEE Commun. Surv. Tutorials doi: 10.1109\/comst.2020.2988293. 10.1109\/comst.2020.2988293","DOI":"10.1109\/COMST.2020.2988293"},{"key":"#cr-split#-e_1_3_2_1_29_1.2","doi-asserted-by":"crossref","unstructured":"M. A. Al-Garadi A. Mohamed A. Al-Ali X. Du I. Ali and M. Guizani 2020. A Survey of Machine and Deep Learning Methods for Internet of Things (IoT) Security IEEE Commun. Surv. Tutorials doi: 10.1109\/comst.2020.2988293.","DOI":"10.1109\/COMST.2020.2988293"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007678930559"},{"key":"e_1_3_2_1_32_1","unstructured":"G. A. Rummery and M. Niranjan 1994. On-Line Q-Learning Using Connectionist Systems.  G. A. Rummery and M. Niranjan 1994. On-Line Q-Learning Using Connectionist Systems."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.peva.2010.11.005"},{"key":"e_1_3_2_1_34_1","first-page":"725","volume-title":"Jondral, 2008. Detection of spectral resources in cognitive radios using reinforcement learning, 2008 IEEE Symp. New Front. Dyn. Spectr. Access Networks, DySPAN 2008","author":"Berthold U.","year":"1867","unstructured":"U. Berthold , F. Fu , M. Van Der Schaar, and F. K . Jondral, 2008. Detection of spectral resources in cognitive radios using reinforcement learning, 2008 IEEE Symp. New Front. Dyn. Spectr. Access Networks, DySPAN 2008 , no. 054 1867 , pp. 725 -- 729 , doi: 10.1109\/DYSPAN.2008.82. 10.1109\/DYSPAN.2008.82 U. Berthold, F. Fu, M. Van Der Schaar, and F. K. Jondral, 2008. Detection of spectral resources in cognitive radios using reinforcement learning, 2008 IEEE Symp. New Front. Dyn. Spectr. Access Networks, DySPAN 2008, no. 0541867, pp. 725--729, doi: 10.1109\/DYSPAN.2008.82."},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings - 20th International Conference on Computer Communications and Networks, ICCCN, doi: 10","author":"Felice M. Di","year":"2011","unstructured":"M. Di Felice , K. R. Chowdhury , A. Kassler , and L. Bononi , 2011. Adaptive sensing scheduling and spectrum selection in cognitive wireless mesh networks , in Proceedings - 20th International Conference on Computer Communications and Networks, ICCCN, doi: 10 .1109\/ICCCN. 2011 .6006042. 10.1109\/ICCCN.2011.6006042 M. Di Felice, K. R. Chowdhury, A. Kassler, and L. Bononi, 2011. Adaptive sensing scheduling and spectrum selection in cognitive wireless mesh networks, in Proceedings - 20th International Conference on Computer Communications and Networks, ICCCN, doi: 10.1109\/ICCCN.2011.6006042."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2010.2048766"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/T-WC.2008.070391"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11277-015-2840-1"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1049\/iet-rsn.2018.5127"}],"event":{"name":"WIMS 2020: The 10th International Conference on Web Intelligence, Mining and Semantics","location":"Biarritz France","acronym":"WIMS 2020"},"container-title":["Proceedings of the 10th International Conference on Web Intelligence, Mining and Semantics"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3405962.3405996","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3405962.3405996","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:31:52Z","timestamp":1750195912000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3405962.3405996"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,30]]},"references-count":41,"alternative-id":["10.1145\/3405962.3405996","10.1145\/3405962"],"URL":"https:\/\/doi.org\/10.1145\/3405962.3405996","relation":{},"subject":[],"published":{"date-parts":[[2020,6,30]]},"assertion":[{"value":"2020-08-24","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}