{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T23:21:12Z","timestamp":1768346472142,"version":"3.49.0"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,2,21]],"date-time":"2022-02-21T00:00:00Z","timestamp":1645401600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,2,21]],"date-time":"2022-02-21T00:00:00Z","timestamp":1645401600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,2,21]]},"DOI":"10.1109\/icaiic54071.2022.9722618","type":"proceedings-article","created":{"date-parts":[[2022,3,1]],"date-time":"2022-03-01T20:41:38Z","timestamp":1646167298000},"page":"284-287","source":"Crossref","is-referenced-by-count":7,"title":["A Survey of Markov Model in Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Tianhan","family":"Gao","sequence":"first","affiliation":[{"name":"Northeastern University,Software College,Shenyang,China"}]},{"given":"Baicheng","family":"Chen","sequence":"additional","affiliation":[{"name":"Northeastern University,Software College,Shenyang,China"}]},{"given":"Qingwei","family":"Mi","sequence":"additional","affiliation":[{"name":"Northeastern University,Software College,Shenyang,China"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/BF01919083"},{"key":"ref11","article-title":"Playing Atari with deep reinforcement learning","author":"mnih","year":"2013"},{"key":"ref12","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume":"32","author":"silver","year":"2014","journal-title":"Proceedings of the 31st International Conference on Machine Learning"},{"key":"ref13","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015"},{"key":"ref14","first-page":"1008","article-title":"Actor-critic algorithms[C]\/\/Advances in neural information processing systems","author":"r","year":"2000"},{"key":"ref15","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016"},{"key":"ref16","first-page":"2829","article-title":"Continuous deep q-learning with model-based acceleration","author":"s","year":"2016"},{"key":"ref17","first-page":"1889","article-title":"Trust region policy optimization","author":"j","year":"2015"},{"key":"ref18","article-title":"Proximal Policy Optimization Algorithms","author":"schulman","year":"2017"},{"key":"ref19","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018"},{"key":"ref4","author":"sutton","year":"2012","journal-title":"An Introduction to Reinforcement Learning"},{"key":"ref3","author":"ross","year":"1996","journal-title":"Stochastic Processes"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1214\/lnms\/1196285381"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1002\/9781119387596"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-388403-9.00013-8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-812343-0.00003-5"},{"key":"ref2","author":"markov","year":"2010","journal-title":"Theory of Algorithms"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/MASSP.1986.1165342"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1002\/9781119387596"},{"key":"ref20","first-page":"172395","article-title":"Multiagent cooperation and competition with deep reinforcement learning","volume":"12","author":"a","year":"2017","journal-title":"PLoS ONE"},{"key":"ref22","article-title":"Counterfactual multi-agent policy gradients","volume":"32","author":"j","year":"2018","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"ref21","article-title":"Value-decomposition networks for cooperative multi-agent learning","author":"p","year":"2017"},{"key":"ref24","first-page":"5887","article-title":"Qtran: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","author":"k","year":"2019"},{"key":"ref23","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","author":"rashid","year":"2018"},{"key":"ref26","author":"z","year":"2015","journal-title":"Hidden Semi-Markov Models Theory Algorithms and Applications"},{"key":"ref25","author":"t","year":"2012","journal-title":"Partially Observable Markov Decision Processes"}],"event":{"name":"2022 International Conference on Artificial Intelligence in Information and Communication (ICAIIC)","location":"Jeju Island, Korea, Republic of","start":{"date-parts":[[2022,2,21]]},"end":{"date-parts":[[2022,2,24]]}},"container-title":["2022 International Conference on Artificial Intelligence in Information and Communication (ICAIIC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9722611\/9722613\/09722618.pdf?arnumber=9722618","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T21:21:05Z","timestamp":1655760065000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9722618\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,21]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/icaiic54071.2022.9722618","relation":{},"subject":[],"published":{"date-parts":[[2022,2,21]]}}}