{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T02:27:13Z","timestamp":1729650433563,"version":"3.28.0"},"reference-count":31,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012,4]]},"DOI":"10.1109\/icnsc.2012.6204909","type":"proceedings-article","created":{"date-parts":[[2012,6,1]],"date-time":"2012-06-01T18:52:12Z","timestamp":1338576732000},"page":"157-162","source":"Crossref","is-referenced-by-count":3,"title":["Reinforcement learning algorithms for semi-Markov decision processes with average reward"],"prefix":"10.1109","author":[{"given":"Yanjie","family":"Li","sequence":"first","affiliation":[]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114727"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"15"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"13","article-title":"Infinite-horizon gradient estimation for semi-Markov decision processes","author":"li","year":"0","journal-title":"8th Asian Control Conference Kaohsiung 2011"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"14"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-69082-7"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2003.811252"},{"key":"21","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","article-title":"Infinite-horizon policy-gradient estimation","volume":"15","author":"baxter","year":"2001","journal-title":"Journal of Artificial Intelligence Research"},{"key":"20","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"sutton","year":"2000","journal-title":"Proceedings of the Conference on Advances in Neural Information Processing Systems"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2005.847037"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012999361974"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2008.925743"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1023\/B:MACH.0000019802.64038.6c"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1109\/9.905687"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2006.02.023"},{"key":"28","first-page":"152","article-title":"Reinforcement learning in POMDPs with function approximation","author":"kimura","year":"1997","journal-title":"Proceedings of the International Conference on Machine Learning"},{"key":"29","first-page":"968","article-title":"Gradient descent for general reinforcement learning","author":"baird","year":"1998","journal-title":"Proceedings of the Conference on Advances in Neural Information Processing Systems"},{"key":"3","article-title":"Semi-Markov and Decision Processes","volume":"2","author":"howard","year":"1971","journal-title":"Dynamic Probabilistic Systems"},{"journal-title":"Applied Semi-Markov Processes","year":"2005","author":"janssen","key":"2"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1016\/S0377-2217(02)00874-3"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-3288-6"},{"key":"30","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"1999","journal-title":"Proceedings of the Conference on Advances in Neural Information Processing Systems"},{"key":"7","first-page":"393","article-title":"Reinforcement learning methods for continuous-time Markov decision problems","volume":"7","author":"bradtke","year":"1995","journal-title":"Proceedings of the Conference on Advances in Neural Information Processing Systems"},{"journal-title":"Dynamic Programming and Optimal Control","year":"1995","author":"bertsekas","key":"6"},{"journal-title":"Stochastic Models An Algorithmic Approach","year":"1994","author":"tijms","key":"5"},{"key":"31","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"key":"4","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.45.4.560"},{"key":"8","first-page":"202","article-title":"Self-improving factory simulation using continuous-time average-reward reinforcement learning","author":"mahadevan","year":"1997","journal-title":"Proceedings of the International Conference on Machine Leanrning"}],"event":{"name":"2012 9th IEEE International Conference on Networking, Sensing and Control (ICNSC)","start":{"date-parts":[[2012,4,11]]},"location":"Beijing, China","end":{"date-parts":[[2012,4,14]]}},"container-title":["Proceedings of 2012 9th IEEE International Conference on Networking, Sensing and Control"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6201003\/6204880\/06204909.pdf?arnumber=6204909","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,16]],"date-time":"2022-01-16T15:56:32Z","timestamp":1642348592000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6204909\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,4]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/icnsc.2012.6204909","relation":{},"subject":[],"published":{"date-parts":[[2012,4]]}}}