{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T05:37:59Z","timestamp":1725514679468},"reference-count":35,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,3]]},"DOI":"10.1109\/icassp.2016.7472542","type":"proceedings-article","created":{"date-parts":[[2016,6,24]],"date-time":"2016-06-24T01:58:30Z","timestamp":1466733510000},"page":"4568-4572","source":"Crossref","is-referenced-by-count":0,"title":["Learning in constrained stochastic dynamic potential games"],"prefix":"10.1109","author":[{"given":"Sergio Valcarcel","family":"Macua","sequence":"first","affiliation":[]},{"given":"Santiago","family":"Zazo","sequence":"additional","affiliation":[]},{"given":"Javier","family":"Zazo","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2010.07.027"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1002\/0471787779"},{"key":"ref31","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"preprint arXiv 1509 02971v1"},{"key":"ref30","first-page":"995","article-title":"Learning of nonparametric control policies with high-dimensional state features","author":"van hoof","year":"2015","journal-title":"Proc Int Conf Artificial Intelligence and Statistics (AISTATS)"},{"article-title":"CVX: Matlab software for disciplined convex programming, version 2.1","year":"2014","author":"grant","key":"ref35"},{"key":"ref34","article-title":"Cooperative distributed multiagent optimization","author":"nedic","year":"2010","journal-title":"Convex Optimization in Signal Processing and Communications"},{"key":"ref10","article-title":"Dynamic potential games in communications: Fundamentals and applications","author":"zazo","year":"2015","journal-title":"preprint arXiv 1509 01313"},{"journal-title":"Nonlinear Programming","year":"1999","author":"bertsekas","key":"ref11"},{"journal-title":"Calculus Multi-Variable Calculus and Linear Algebra With Applications to Differential Equations and Probability","year":"1969","author":"apostol","key":"ref12"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"ref13"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"ref14"},{"journal-title":"Dynamic Programming and Optimal Control","year":"2012","author":"bertsekas","key":"ref15"},{"journal-title":"Algorithms for Reinforcement Learning","year":"2009","author":"szepesvari","key":"ref16"},{"key":"ref17","doi-asserted-by":"crossref","DOI":"10.1201\/9781439821091","author":"busoniu","year":"2010","journal-title":"Reinforcement Learning and Dynamic Programming Using Function Approximators"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-27645-3","author":"wiering","year":"2012","journal-title":"Reinforcement Learning State-of-the-Art"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1561\/2200000042"},{"key":"ref28","first-page":"1177","article-title":"Fitted q-iteration by advantage weighted regression","author":"neumann","year":"2009","journal-title":"Proc Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1110.0500"},{"key":"ref27","first-page":"2063","article-title":"Ex?a?: An effective algorithm for continuous actions reinforcement learning problems","author":"martin h","year":"2009","journal-title":"35th Annual IEEE Industrial Electronics Conf (IECON)"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1660950"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s13235-014-0105-3"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2011.5967381"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/0022-0531(78)90055-8"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.3182\/20090506-3-SF-4003.00002"},{"key":"ref7","article-title":"Non cooperative dynamic games: a control theoretic approach","author":"dechert","year":"1997","journal-title":"Tech Rep"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1006\/game.1996.0044"},{"key":"ref9","article-title":"A new framework for solving dynamic schedulling games","author":"zazo","year":"2015","journal-title":"Proc IEEE Int Conf on Acoustics Speech and Signal Processing (ICASSP)"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.2307\/2950588"},{"key":"ref20","article-title":"Value function approximation in reinforcement learning using the fourier basis","author":"konidaris","year":"2008","journal-title":"Proc AAAI Conf on Artificial Intelligence"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368199"},{"key":"ref21","article-title":"Fitted Q-iteration in continuous action-space MDPs","author":"antos","year":"2007","journal-title":"Neural Information Processing Systems (NIPS) Vancouver Canada"},{"key":"ref24","article-title":"Online stochastic optimization under correlated bandit feedback","author":"azar","year":"0","journal-title":"Proc Int Conf on Machine Learning (ICML) Beijing 2014 JMLR Workshop and Conference Proceedings"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87481-2_5"},{"key":"ref26","first-page":"833","article-title":"Reinforcement learning in continuous action spaces through sequential monte carlo methods","author":"lazaric","year":"2007","journal-title":"Neural Information Processing Systems (NIPS) Vancouver Canada"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017988514716"}],"event":{"name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2016,3,20]]},"location":"Shanghai","end":{"date-parts":[[2016,3,25]]}},"container-title":["2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7465907\/7471614\/07472542.pdf?arnumber=7472542","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,24]],"date-time":"2017-06-24T17:00:23Z","timestamp":1498323623000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7472542\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,3]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/icassp.2016.7472542","relation":{},"subject":[],"published":{"date-parts":[[2016,3]]}}}