{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T17:09:52Z","timestamp":1760202592918,"version":"3.28.0"},"reference-count":32,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008,6]]},"DOI":"10.1109\/ijcnn.2008.4633942","type":"proceedings-article","created":{"date-parts":[[2008,9,29]],"date-time":"2008-09-29T20:31:26Z","timestamp":1222720286000},"page":"1135-1144","source":"Crossref","is-referenced-by-count":1,"title":["A novel heuristic Q-learning algorithm for solving stochastic games"],"prefix":"10.1109","author":[{"family":"Jianwei Li","sequence":"first","affiliation":[]},{"family":"Weiyi Liu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"algorithms for sequetial decision making","year":"1996","author":"littinan","key":"19"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50052-9"},{"key":"18","first-page":"322","article-title":"friend-or-foe q-learning in general-sum games","author":"littman","year":"2001","journal-title":"Proceedings of the Eighteenth International Conference on Machine Learning"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"16","article-title":"on complexity of solving markov decision problems","author":"littman","year":"1995","journal-title":"Proceedings of the Eleventh Annual Conference on Uncertainty in Artificial Intelligence(UAI-95)"},{"year":"0","key":"13"},{"year":"0","key":"14"},{"key":"11","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","article-title":"reinforcement learning: a survey","volume":"4","author":"kaelbling","year":"1996","journal-title":"Journal of Artificial Intelligence Research"},{"key":"12","first-page":"253","article-title":"graphical models for game theory","author":"kearns","year":"2001","journal-title":"Seventeenth Conference on Uncertainty in Artificial Intelligence"},{"key":"21","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.36.1.48"},{"journal-title":"Machine Learning","year":"1997","author":"mitchell","key":"20"},{"key":"22","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes-Discrete Stochastic Dynamic Programming"},{"journal-title":"Artificial Intelligence - A Modern Approach","year":"2002","author":"russel","key":"23"},{"key":"24","article-title":"a general method for multi-agent learning and incremental self-improvement in unrestricted environments","author":"schmidhuber","year":"1996","journal-title":"Evolutionary Computation Theory and Applications"},{"key":"25","first-page":"655","article-title":"robust reinforcement learning in motion planning","volume":"6","author":"singh","year":"1994","journal-title":"Advances in neural information processing systems"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007678930559"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-200-7.50073-8"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1145\/203330.203343"},{"key":"29","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993306"},{"year":"0","key":"3"},{"key":"2","first-page":"20","article-title":"residual q-learning applied to visual attention","author":"bandera","year":"1996","journal-title":"Thirteenth International Conference on Machine Learning"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993348"},{"journal-title":"Dynamic Programming","year":"1957","author":"bellman","key":"1"},{"journal-title":"Learning from delayed rewards","year":"1989","author":"watkins","key":"30"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1162\/jmlr.2003.4.6.1039"},{"journal-title":"Dynamic Programming and Markov Processes","year":"1960","author":"howard","key":"6"},{"journal-title":"Linear Programming Methods and Applications","year":"0","author":"gass","key":"32"},{"journal-title":"Continuous State Space Q-Learning for Control of Nonlinear Systems","year":"2001","author":"hagen","key":"5"},{"key":"31","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"journal-title":"Game Theory","year":"1991","author":"fudenberg","key":"4"},{"year":"0","key":"9"},{"year":"0","key":"8"}],"event":{"name":"2008 IEEE International Joint Conference on Neural Networks (IJCNN 2008 - Hong Kong)","start":{"date-parts":[[2008,6,1]]},"location":"Hong Kong, China","end":{"date-parts":[[2008,6,8]]}},"container-title":["2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4625775\/4633757\/04633942.pdf?arnumber=4633942","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,17]],"date-time":"2021-09-17T13:51:51Z","timestamp":1631886711000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4633942\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,6]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/ijcnn.2008.4633942","relation":{},"subject":[],"published":{"date-parts":[[2008,6]]}}}