{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T18:42:23Z","timestamp":1775932943758,"version":"3.50.1"},"reference-count":15,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1109\/icacci.2017.8125811","type":"proceedings-article","created":{"date-parts":[[2017,12,4]],"date-time":"2017-12-04T17:26:11Z","timestamp":1512408371000},"page":"26-32","source":"Crossref","is-referenced-by-count":29,"title":["Comparison of reinforcement learning algorithms applied to the cart-pole problem"],"prefix":"10.1109","author":[{"given":"Savinay","family":"Nagendra","sequence":"first","affiliation":[]},{"given":"Nikhil","family":"Podila","sequence":"additional","affiliation":[]},{"given":"Rashmi","family":"Ugarakhod","sequence":"additional","affiliation":[]},{"given":"Koshy","family":"George","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"273","article-title":"On the significance of Markov decision process","volume":"1327","author":"sutton","year":"1997","journal-title":"Artificial Neural Networks International Conference on Artificial Neural Networks (ICANN)"},{"key":"ref11","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/BF00933260"},{"key":"ref13","first-page":"1471","article-title":"Variance reduction techniques for gradient estimates in reinforcement learning","volume":"5","author":"greensmith","year":"2004","journal-title":"Journal of Machine Learning Research"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IECON.1991.239008"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s12555-009-0419-x"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/AMS.2012.21"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2006.870667"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"ref5","first-page":"137","article-title":"Boxes: An experiment in adaptive control","author":"michie","year":"1968","journal-title":"Machine Intelligence"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/37.24809"},{"key":"ref7","author":"sutton","year":"1984","journal-title":"Temporal credit assignment in reinforcement learning"},{"key":"ref2","first-page":"3829","article-title":"Reinforcement learning with fuzzy evaluative feedback for a biped robot","author":"zhou","year":"2000","journal-title":"Proceedings of the IEEE International Conference on Robotics and Automation (ICRA)"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(89)90049-0"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/11564096_32"}],"event":{"name":"2017 International Conference on Advances in Computing, Communications and Informatics (ICACCI)","location":"Udupi","start":{"date-parts":[[2017,9,13]]},"end":{"date-parts":[[2017,9,16]]}},"container-title":["2017 International Conference on Advances in Computing, Communications and Informatics (ICACCI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8119306\/8125802\/08125811.pdf?arnumber=8125811","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2018,1,15]],"date-time":"2018-01-15T17:51:17Z","timestamp":1516038677000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8125811\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,9]]},"references-count":15,"URL":"https:\/\/doi.org\/10.1109\/icacci.2017.8125811","relation":{},"subject":[],"published":{"date-parts":[[2017,9]]}}}