{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,4,20]],"date-time":"2023-04-20T07:08:46Z","timestamp":1681974526963},"reference-count":54,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2012,12,1]],"date-time":"2012-12-01T00:00:00Z","timestamp":1354320000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput. Intell. AI Games"],"published-print":{"date-parts":[[2012,12]]},"DOI":"10.1109\/tciaig.2012.2212279","type":"journal-article","created":{"date-parts":[[2012,8,8]],"date-time":"2012-08-08T18:02:44Z","timestamp":1344448964000},"page":"309-328","source":"Crossref","is-referenced-by-count":15,"title":["Bayesian-Game-Based Fuzzy Reinforcement Learning Control for Decentralized POMDPs"],"prefix":"10.1109","volume":"4","author":[{"given":"Rajneesh","family":"Sharma","sequence":"first","affiliation":[]},{"given":"Matthijs T. J.","family":"Spaan","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"389","volume":"3720","author":"szer","year":"2005","journal-title":"ECML'05 Proceedings of the 16th European conference on Machine Learning"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-009-9103-z"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/FUZZY.2011.6007675"},{"key":"ref32","first-page":"136","article-title":"Approximate solutions for partially observable stochastic games with common payoffs","author":"emery-montemerlo","year":"2004","journal-title":"Proc 3rd Int l Joint Conf Autonomous Agents and Multi Agent Systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/9780470544785"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022676722315"},{"key":"ref37","author":"bertsekas","year":"1996","journal-title":"Neuro-Dynamic Programming"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2007.903323"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/5326.704563"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/72.159070"},{"key":"ref28","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-007-9026-5"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"ref2","first-page":"587","article-title":"Designing a successful adaptive agent for TAC ad auction","author":"chang","year":"2010","journal-title":"Proc Euro Conf Artif Intell"},{"key":"ref1","first-page":"146","article-title":"AstonCAT-Plus: An efficient specialist for the TAC market design tournament","author":"chang","year":"2011","journal-title":"Proc Int Joint Conf Artif Intell"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1287\/moor.27.4.819.297"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1613\/jair.1024","article-title":"The communicative multi agent team decision problem: Analyzing teamwork theories and models","volume":"16","author":"pynadath","year":"2002","journal-title":"J Artif Intell Res"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/1-4020-3389-3_8"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1287\/moor.12.3.441"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/4-431-35881-1_18"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1613\/jair.2447","article-title":"Optimal and approximate Q-value functions for decentralized POMDPs","volume":"32","author":"oliehoek","year":"2008","journal-title":"J Artif Intell Res"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/FUZZY.2010.5584614"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/11559221_31"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.2316\/Journal.206.2006.2.206-2795"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/S0165-0114(02)00299-3"},{"key":"ref53","author":"owen","year":"1982","journal-title":"Game Theory"},{"key":"ref52","first-page":"251","article-title":"FDMS with Q learning: A neuro fuzzy approach to POMDPs","volume":"1","author":"karadoniz","year":"2004","journal-title":"Int J Adv Robot Syst"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2005.1570273"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1996.574318"},{"key":"ref40","first-page":"705","article-title":"Taming Decentralized POMDPs: Towards efficient policy computation for multi agent settings","author":"nair","year":"2003","journal-title":"Proc Int Joint Conf Artif Intell"},{"key":"ref12","first-page":"1825","article-title":"Reinforcement learning for adaptive routing","author":"peshkin","year":"2002","journal-title":"Proc Int Joint Conf Neural Netw"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-11688-9_18"},{"key":"ref14","first-page":"201","article-title":"Event-detecting multi-agent MDPs: Complexity and constant-factor approximation","author":"kumar","year":"2009","journal-title":"Proc Int Joint Conf Artif Intell"},{"key":"ref15","first-page":"561","article-title":"Constraint-based dynamic programming for decentralized POMDPs with structured interactions","author":"kumar","year":"2009","journal-title":"Proc Int Conf Auton Agents and Multi Agent Syst"},{"key":"ref16","first-page":"133","article-title":"Networked distributed POMDPs: A synthesis of distributed constraint optimization and POMDPs","author":"nair","year":"2005","journal-title":"Proc Nat Conf Artif Intell"},{"key":"ref17","author":"jackson","year":"2010","journal-title":"An applications of a Dec-POMDP in a real-time strategy game"},{"key":"ref18","first-page":"196","article-title":"IMPLANT: An integrated MDP and POMDP learning agent for adaptive games","author":"tan","year":"2009","journal-title":"Proc Artif Intell Interactive Digit Entertain Conf"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref4","first-page":"2027","article-title":"Scaling up optimal heuristic search in Dec-POMDPs via incremental expansion","author":"spaan","year":"2011","journal-title":"Proc Int Joint Conf Artif Intell"},{"key":"ref3","author":"oliehoek","year":"2012","journal-title":"Reinforcement Learning State of the Art"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1978.1101704"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision ProcessesDiscrete Stochastic Dynamic Programming"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2012.6224917"},{"key":"ref7","author":"thrun","year":"2005","journal-title":"Probabilistic Robotics"},{"key":"ref49","first-page":"1040","article-title":"An approximate dynamic programming approach to decentralized control of stochastic systems","author":"cogill","year":"2004","journal-title":"Proc Allerton Conf Commun Control Comput"},{"key":"ref9","first-page":"1747","article-title":"Agent-based coordination of human-multirobot teams in complex environments","author":"carlin","year":"2010","journal-title":"Proc Int Conf Auton Agents and Multi Agent Syst"},{"key":"ref46","first-page":"32","article-title":"The complexity of decentralized control of Markov decision processes","author":"bernstein","year":"2000","journal-title":"Proc 16th Conf Uncertainty Artif Intell"},{"key":"ref45","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1613\/jair.1579","article-title":"A framework for sequential planning in multi-agent settings","volume":"24","author":"gmytrasiewicz","year":"2005","journal-title":"J Artif Intell Res"},{"key":"ref48","first-page":"1","article-title":"Optimizing memory-bounded controllers for decentralized POMDP","author":"amato","year":"2007","journal-title":"Proc 23rd Conf Uncertainty Artif Intell"},{"key":"ref47","author":"poupart","year":"2004","journal-title":"Advances in Neural Information Processing Systems 16"},{"key":"ref42","first-page":"1287","article-title":"Bounded policy iteration for decentralized POMDPs","author":"bernstein","year":"2005","journal-title":"Proc 19th Int Joint Conf Artif Intell"},{"key":"ref41","first-page":"593","article-title":"Achieving goals in decentralized POMDPs","author":"amato","year":"2009","journal-title":"Proc 8th Int Conf Autonom Syst Multi Agent Syst"},{"key":"ref44","first-page":"489","article-title":"Learning to cooperate via policy search","author":"peshkin","year":"2000","journal-title":"Uncertainty Artif Intell Proc 10th Conf"},{"key":"ref43","author":"ross","year":"2004","journal-title":"Fuzzy Logic with Engineering Applications"}],"container-title":["IEEE Transactions on Computational Intelligence and AI in Games"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4804728\/6378413\/06263288.pdf?arnumber=6263288","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T00:17:22Z","timestamp":1643156242000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6263288\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,12]]},"references-count":54,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tciaig.2012.2212279","relation":{},"ISSN":["1943-068X","1943-0698"],"issn-type":[{"value":"1943-068X","type":"print"},{"value":"1943-0698","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,12]]}}}