{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T04:52:47Z","timestamp":1773377567911,"version":"3.50.1"},"reference-count":50,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,12]],"date-time":"2021-07-12T00:00:00Z","timestamp":1626048000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,12]],"date-time":"2021-07-12T00:00:00Z","timestamp":1626048000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,12]]},"DOI":"10.1109\/isit45174.2021.9518158","type":"proceedings-article","created":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T16:52:42Z","timestamp":1630515162000},"page":"1682-1687","source":"Crossref","is-referenced-by-count":1,"title":["Learning Good State and Action Representations via Tensor Decomposition"],"prefix":"10.1109","author":[{"given":"Chengzhuo","family":"Ni","sequence":"first","affiliation":[{"name":"Princeton University,NJ,USA,08544"}]},{"given":"Anru R.","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Wisconsin-Madison,Madison,WI,USA,53706"}]},{"given":"Yaqi","family":"Duan","sequence":"additional","affiliation":[{"name":"Princeton University,NJ,USA,08544"}]},{"given":"Mengdi","family":"Wang","sequence":"additional","affiliation":[{"name":"Princeton University,NJ,USA,08544"}]}],"member":"263","reference":[{"key":"ref39","first-page":"1704","article-title":"Contextual decision processes with low bellman rank are pac-learnable","author":"jiang","year":"0","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70"},{"key":"ref38","article-title":"Reinforcement learning in rich-observation mdps using spectral methods","author":"azizzadenesheli","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1137\/07070111X"},{"key":"ref32","article-title":"Guaranteed non-orthogonal tensor decomposition via alternating rank-1 updates","author":"anandkumar","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref31","first-page":"793","article-title":"Sublinear time orthogonal tensor decomposition","author":"song","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2018.2841377"},{"key":"ref37","volume":"1","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref36","author":"bertsekas","year":"2007","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref35","author":"levin","year":"2009","journal-title":"Markov Chains and Mixing Times"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2013.2297439"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479898346995"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479896305696"},{"key":"ref29","first-page":"2897","article-title":"A statistical model for tensor pca","author":"richard","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref2","first-page":"5616","article-title":"Limiting extrapolation in linear approximate value iteration","author":"zanette","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref1","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"Journal of Machine Learning Research"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114724"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273545"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"3819","DOI":"10.1109\/CDC.2002.1184960","article-title":"State aggregation in markov decision processes","volume":"4","author":"ren","year":"2002","journal-title":"Decision and Control 2002 Proceedings of the 41st IEEE Conference on"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273589"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102421"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1561\/2200000003"},{"key":"ref25","first-page":"2574","article-title":"An analysis of laplacian methods for value function approximation in mdps","author":"petrik","year":"2007","journal-title":"IJCAI"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1214\/17-AOS1541"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1137\/070696325"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1063\/1.3590108"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2002.803530"},{"key":"ref12","first-page":"51","article-title":"On the numerical approximation of the perron&#x2013;frobenius and koopman operator","volume":"3","author":"klus","year":"2016","journal-title":"Journal of Computational Dynamics"},{"key":"ref13","article-title":"Eigendecompositions of transfer operators in reproducing kernel hilbert spaces","author":"klus","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2019.2956737"},{"key":"ref15","article-title":"Spectral thresholding for the estimation of markov chain transition operators","author":"l\u00f6ffler","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref16","first-page":"4563","article-title":"Learning low-dimensional state embeddings and metastable clusters from time series data","author":"sun","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-200-7.50069-6"},{"key":"ref18","author":"bertsekas","year":"1996","journal-title":"Neuro-Dynamic Programming"},{"key":"ref19","first-page":"361","article-title":"Reinforcement learning with soft state aggregation","author":"singh","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref4","article-title":"Provably efficient reinforcement learning with linear function approximation","author":"jin","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref3","first-page":"6995","article-title":"Sample-optimal parametric q-learning using linearly additive features","author":"yang","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref6","article-title":"Learning with good feature representations in bandits and in rl with a generative model","author":"lattimore","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref5","article-title":"Is a good representation sufficient for sample efficient reinforcement learning?","author":"du","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref8","article-title":"Kinematic state abstraction and provably efficient rich-observation reinforcement learning","author":"misra","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref7","article-title":"Provably efficient rl with rich observations via latent state decoding","author":"du","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/BF01932678"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2006.184"},{"key":"ref46","article-title":"An optimal statistical and computational framework for generalized tensor estimation","author":"han","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1137\/110836067"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1214\/ECP.v16-1624"},{"key":"ref47","author":"vershynin","year":"2017","journal-title":"High Dimensional Probability"},{"key":"ref42","first-page":"1177","article-title":"Random features for large-scale kernel machines","author":"rahimi","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref41","article-title":"Online learning in kernelized markov decision processes","author":"chowdhury","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1137\/06066518X"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-1904-8_8"}],"event":{"name":"2021 IEEE International Symposium on Information Theory (ISIT)","location":"Melbourne, Australia","start":{"date-parts":[[2021,7,12]]},"end":{"date-parts":[[2021,7,20]]}},"container-title":["2021 IEEE International Symposium on Information Theory (ISIT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9517708\/9517709\/09518158.pdf?arnumber=9518158","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:34:24Z","timestamp":1773347664000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9518158\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,12]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1109\/isit45174.2021.9518158","relation":{},"subject":[],"published":{"date-parts":[[2021,7,12]]}}}