{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T15:02:15Z","timestamp":1773414135631,"version":"3.50.1"},"reference-count":55,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Signal Process."],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/tsp.2024.3379089","type":"journal-article","created":{"date-parts":[[2024,3,20]],"date-time":"2024-03-20T18:37:32Z","timestamp":1710959852000},"page":"1634-1649","source":"Crossref","is-referenced-by-count":10,"title":["Tensor and Matrix Low-Rank Value-Function Approximation in Reinforcement Learning"],"prefix":"10.1109","volume":"72","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1042-7502","authenticated-orcid":false,"given":"Sergio","family":"Rozada","sequence":"first","affiliation":[{"name":"Department of Signal Theory and Communications, King Juan Carlos University, Madrid, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6310-6345","authenticated-orcid":false,"given":"Santiago","family":"Paternain","sequence":"additional","affiliation":[{"name":"Department of Electrical, Computer and Systems Engineering, Rensselaer Polytechnic Institute, Troy, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4642-7718","authenticated-orcid":false,"given":"Antonio G.","family":"Marques","sequence":"additional","affiliation":[{"name":"Department of Signal Theory and Communications, King Juan Carlos University, Madrid, Spain"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref2","volume-title":"Reinforcement Learning and Optimal Control","author":"Bertsekas","year":"2019"},{"key":"ref3","volume-title":"Dynamic Programming and Optimal Control","volume":"1","author":"Bertsekas,","year":"2000"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref6","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Brown","year":"2020"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-4095-0"},{"key":"ref9","volume-title":"Neuro-Dynamic Programming","author":"Bertsekas","year":"1996"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-72927-3_23"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v29i1.3527"},{"key":"ref12","article-title":"Low-rank feature selection for reinforcement learning","volume-title":"Proc. Int. Symp. Artif. Intell. Math. (ISAIM)","author":"Behzadian","year":"2018"},{"key":"ref13","article-title":"Playing Atari with deep reinforcement learning","author":"Mnih","year":"2013"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/BF02288367"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-2227-2"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1561\/2200000055"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2279080"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10311"},{"key":"ref20","first-page":"1704","article-title":"Contextual decision processes with low Bellman rank are PAC-learnable","volume-title":"Proc. 34th Int. Conf. Mach. Learn. (PMLR)","volume":"70","author":"Jiang","year":"2017"},{"key":"ref21","first-page":"20095","article-title":"FLAMBE: Structural complexity and representation learning of low rank MDPs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Agarwal","year":"2020"},{"key":"ref22","article-title":"Representation learning for online and offline RL in low-rank MDPs","author":"Uehara","year":"2021"},{"key":"ref23","article-title":"Value function approximation via low-rank models","author":"Ong","year":"2015"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2016.2605141"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2017.2716382"},{"key":"ref26","article-title":"Harnessing structures for value-based planning and reinforcement learning","author":"Yang","year":"2019"},{"key":"ref27","first-page":"12092","article-title":"Sample efficient reinforcement learning via low-rank matrix estimation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Shah","year":"2020"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3589973"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1137\/07070111X"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2017.2690524"},{"key":"ref31","article-title":"Tensor comprehensions: Framework-agnostic high-performance machine learning abstractions","author":"Vasilache","year":"2018"},{"key":"ref32","first-page":"7301","article-title":"TESSERACT: Tensorised actors for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (PMLR)","author":"Mahajan","year":"2021"},{"key":"ref33","article-title":"Model based multi-agent reinforcement learning with tensor decompositions","author":"Vaart","year":"2021"},{"key":"ref34","first-page":"193","article-title":"Reinforcement learning of POMDPs using spectral methods","volume-title":"Proc. Conf. Learn. Theory (PMLR)","author":"Azizzadenesheli","year":"2016"},{"key":"ref35","article-title":"Reinforcement learning in rich-observation MDPs using spectral methods","volume-title":"Proc. 29th Annu. Conf. Learn. Theory","author":"Azizzadenesheli","year":"2016"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013739"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2021.3055957"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1137\/080738970"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2010.2044061"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1002\/sapm192761164"},{"key":"ref41","volume-title":"Dynamic Programming","author":"Bellman","year":"1957"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012902399824"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO54536.2021.9616008"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2019.2914998"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-004-0564-1"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/11503415_37"},{"key":"ref47","volume-title":"Deep Learning","author":"Goodfellow","year":"2016"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/S0169-7439(97)00032-4"},{"key":"ref49","article-title":"OpenAI Gym","author":"Brockman","year":"2016"},{"key":"ref50","article-title":"Goddards rocket problem","year":"2024"},{"key":"ref51","article-title":"Gym classics","author":"Daley","year":"2024"},{"key":"ref52","article-title":"An environment for autonomous driving decision-making","author":"Leurent","year":"2024"},{"key":"ref53","article-title":"Online code repository: Tensor and matrix low-rank value-function approximation in reinforcement learning","author":"Rozada","year":"2024"},{"key":"ref54","article-title":"Tensorly: Tensor learning in Python","author":"Kossaifi","year":"2016"},{"key":"ref55","article-title":"Prioritized experience replay","author":"Schaul","year":"2015"}],"container-title":["IEEE Transactions on Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/78\/10347386\/10476639.pdf?arnumber=10476639","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,9]],"date-time":"2024-04-09T05:19:29Z","timestamp":1712639969000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10476639\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":55,"URL":"https:\/\/doi.org\/10.1109\/tsp.2024.3379089","relation":{},"ISSN":["1053-587X","1941-0476"],"issn-type":[{"value":"1053-587X","type":"print"},{"value":"1941-0476","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}