{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T10:33:49Z","timestamp":1766486029467,"version":"3.28.0"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,8,23]],"date-time":"2021-08-23T00:00:00Z","timestamp":1629676800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,8,23]],"date-time":"2021-08-23T00:00:00Z","timestamp":1629676800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,8,23]]},"DOI":"10.23919\/eusipco54536.2021.9616008","type":"proceedings-article","created":{"date-parts":[[2021,12,8]],"date-time":"2021-12-08T21:55:53Z","timestamp":1639000553000},"page":"1471-1475","source":"Crossref","is-referenced-by-count":6,"title":["Low-rank State-action Value-function Approximation"],"prefix":"10.23919","author":[{"given":"Sergio","family":"Rozada","sequence":"first","affiliation":[]},{"given":"Victor","family":"Tenorio","sequence":"additional","affiliation":[]},{"given":"Antonio G.","family":"Marques","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"1704","article-title":"Contextual decision processes with low bellman rank are pac-learnable","volume":"70","author":"jiang","year":"2017","journal-title":"Proc 7th Int Conf Machine Learning"},{"key":"ref11","article-title":"Value function approximation via low-rank models","author":"ong","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2016.2605141"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2017.2716382"},{"key":"ref14","article-title":"Harnessing structures for value-based planning and reinforcement learning","author":"yang","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref15","article-title":"Sample efficient reinforcement learning via low-rank matrix estimation","author":"shah","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref16","article-title":"Factorized Q-learning for large-scale multi-agent systems","author":"chen","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-72927-3_23"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"601","DOI":"10.1609\/icaps.v29i1.3527","article-title":"Fast feature selection for linear value function approximation","volume":"29","author":"behzadian","year":"2019","journal-title":"Proc Intl Conf Automated Planning and Scheduling"},{"key":"ref19","article-title":"Low-rank feature selection for reinforcement learning","author":"behzadian","year":"2018","journal-title":"ISAIM"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/BF02288367"},{"key":"ref3","article-title":"Playing Atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"ArXiv Preprint"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"watkins","year":"1992","journal-title":"Machine Learning"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-89620-5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2279080"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1561\/2200000055"},{"journal-title":"REINFORCEMENT LEARNING AND OPTIMAL CONTROL","year":"2019","author":"bertsekas","key":"ref2"},{"journal-title":"Reinforcement Learning An Introduction","year":"2011","author":"sutton","key":"ref1"},{"key":"ref9","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v30i1.10311","article-title":"Incremental stochastic factorization for online reinforcement learning","author":"barreto","year":"2016","journal-title":"Proc AAAI Conf Artificial Intelligence"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2019.2914998"},{"key":"ref22","article-title":"OpenAI Gym","author":"brockman","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-004-0564-1"},{"journal-title":"Online code repository Low-rank state-action value-function approximation","year":"0","author":"rozada","key":"ref23"}],"event":{"name":"2021 29th European Signal Processing Conference (EUSIPCO)","start":{"date-parts":[[2021,8,23]]},"location":"Dublin, Ireland","end":{"date-parts":[[2021,8,27]]}},"container-title":["2021 29th European Signal Processing Conference (EUSIPCO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9615915\/9615917\/09616008.pdf?arnumber=9616008","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,17]],"date-time":"2023-01-17T18:13:05Z","timestamp":1673979185000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9616008\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,23]]},"references-count":23,"URL":"https:\/\/doi.org\/10.23919\/eusipco54536.2021.9616008","relation":{},"subject":[],"published":{"date-parts":[[2021,8,23]]}}}