{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T16:07:06Z","timestamp":1750694826547,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,10]],"date-time":"2024-06-10T00:00:00Z","timestamp":1717977600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Fannie and John Hertz Foundation Fellowship","award":[""],"award-info":[{"award-number":[""]}]},{"name":"NSF GRFP Fellowship","award":[""],"award-info":[{"award-number":[""]}]},{"name":"Microsoft Trustworthy AI Grant","award":[""],"award-info":[{"award-number":[""]}]},{"name":"ONR Young Investigator Award","award":[""],"award-info":[{"award-number":[""]}]},{"name":"David and Lucile Packard Foundation","award":[""],"award-info":[{"award-number":[""]}]},{"name":"DOD NDSEG Fellowship","award":[""],"award-info":[{"award-number":[""]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,10]]},"DOI":"10.1145\/3618260.3649710","type":"proceedings-article","created":{"date-parts":[[2024,6,11]],"date-time":"2024-06-11T19:25:02Z","timestamp":1718133902000},"page":"183-193","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Exploring and Learning in Sparse Linear MDPs without Computationally Intractable Oracles"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2274-7861","authenticated-orcid":false,"given":"Noah","family":"Golowich","sequence":"first","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7047-0495","authenticated-orcid":false,"given":"Ankur","family":"Moitra","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0569-6698","authenticated-orcid":false,"given":"Dhruv","family":"Rohatgi","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the Fifteenth International Conference on Artificial Intelligence and Statistics, Neil D. Lawrence and Mark Girolami (Eds.) (Proceedings of Machine Learning Research","volume":"9","author":"Abbasi-Yadkori Yasin","year":"2012","unstructured":"Yasin Abbasi-Yadkori, David Pal, and Csaba Szepesvari. 2012. Online-to-Confidence-Set Conversions and Application to Sparse Stochastic Bandits. In Proceedings of the Fifteenth International Conference on Artificial Intelligence and Statistics, Neil D. Lawrence and Mark Girolami (Eds.) (Proceedings of Machine Learning Research, Vol. 22). PMLR, La Palma, Canary Islands. 1\u20139."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 34th International Conference on Machine Learning -","volume":"70","author":"Azar Mohammad Gheshlaghi","year":"2017","unstructured":"Mohammad Gheshlaghi Azar, Ian Osband, and R\u00e9mi Munos. 2017. Minimax Regret Bounds for Reinforcement Learning. In Proceedings of the 34th International Conference on Machine Learning - Volume 70 (ICML\u201917). JMLR.org, 263\u2013272."},{"key":"e_1_3_2_1_3_1","volume-title":"Policy search by dynamic programming. Advances in neural information processing systems, 16","author":"Bagnell James","year":"2003","unstructured":"James Bagnell, Sham M Kakade, Jeff Schneider, and Andrew Ng. 2003. Policy search by dynamic programming. Advances in neural information processing systems, 16 (2003)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/asr043"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1214\/08-AOS620"},{"key":"e_1_3_2_1_6_1","first-page":"213","article-title":"R-max-a general polynomial time algorithm for near-optimal reinforcement learning","author":"Brafman Ronen I","year":"2002","unstructured":"Ronen I Brafman and Moshe Tennenholtz. 2002. R-max-a general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learning Research, 3, Oct (2002), 213\u2013231.","journal-title":"Journal of Machine Learning Research, 3"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2011.2146090"},{"volume-title":"The Dantzig selector: Statistical estimation when","author":"Candes Emmanuel","key":"e_1_3_2_1_8_1","unstructured":"Emmanuel Candes and Terence Tao. 2007. The Dantzig selector: Statistical estimation when p is much larger than n."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3406325.3451125"},{"key":"e_1_3_2_1_10_1","volume-title":"International Conference on Machine Learning. 2826\u20132836","author":"Du Simon","year":"2021","unstructured":"Simon Du, Sham Kakade, Jason Lee, Shachar Lovett, Gaurav Mahajan, Wen Sun, and Ruosong Wang. 2021. Bilinear classes: A structural framework for provable generalization in rl. In International Conference on Machine Learning. 2826\u20132836."},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Machine Learning. 1665\u20131674","author":"Du Simon","year":"2019","unstructured":"Simon Du, Akshay Krishnamurthy, Nan Jiang, Alekh Agarwal, Miroslav Dudik, and John Langford. 2019. Provably efficient rl with rich observations via latent state decoding. In International Conference on Machine Learning. 1665\u20131674."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/0890-5401(89)90001-1"},{"key":"e_1_3_2_1_13_1","volume-title":"Implementation Matters in Deep RL: A Case Study on PPO and TRPO. In International Conference on Learning Representations.","author":"Engstrom Logan","year":"2020","unstructured":"Logan Engstrom, Andrew Ilyas, Shibani Santurkar, Dimitris Tsipras, Firdaus Janoos, Larry Rudolph, and Aleksander Madry. 2020. Implementation Matters in Deep RL: A Case Study on PPO and TRPO. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_14_1","unstructured":"Dylan J Foster Sham M Kakade Jian Qian and Alexander Rakhlin. 2021. The statistical complexity of interactive decision making. arXiv preprint arXiv:2112.13487."},{"key":"e_1_3_2_1_15_1","first-page":"1458","article-title":"Learning in observable pomdps, without computationally intractable oracles","volume":"35","author":"Golowich Noah","year":"2022","unstructured":"Noah Golowich, Ankur Moitra, and Dhruv Rohatgi. 2022. Learning in observable pomdps, without computationally intractable oracles. Advances in Neural Information Processing Systems, 35 (2022), 1458\u20131473.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Noah Golowich Ankur Moitra and Dhruv Rohatgi. 2023. Exploring and Learning in Sparse Linear MDPs without Computationally Intractable Oracles. arxiv:2309.09457.","DOI":"10.1145\/3618260.3649710"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"e_1_3_2_1_18_1","unstructured":"Aparna Ajit Gupte and Kerri Lu. 2020. Fine-Grained Complexity of Sparse Linear Regression."},{"key":"e_1_3_2_1_19_1","volume-title":"International Conference on Artificial Intelligence and Statistics. 316\u2013324","author":"Hao Botao","year":"2021","unstructured":"Botao Hao, Tor Lattimore, Csaba Szepesv\u00e1ri, and Mengdi Wang. 2021. Online sparse reinforcement learning. In International Conference on Artificial Intelligence and Statistics. 316\u2013324."},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Learning Representations.","author":"Ilyas Andrew","year":"2020","unstructured":"Andrew Ilyas, Logan Engstrom, Shibani Santurkar, Dimitris Tsipras, Firdaus Janoos, Larry Rudolph, and Aleksander Madry. 2020. A Closer Look at Deep Policy Gradients. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_21_1","volume-title":"International Conference on Machine Learning. 9525\u20139587","author":"Ilyas Andrew","year":"2022","unstructured":"Andrew Ilyas, Sung Min Park, Logan Engstrom, Guillaume Leclerc, and Aleksander Madry. 2022. Datamodels: Understanding predictions with data and data with predictions. In International Conference on Machine Learning. 9525\u20139587."},{"key":"e_1_3_2_1_22_1","volume-title":"Near-Optimal Regret Bounds for Reinforcement Learning. J. Mach. Learn. Res., 11","author":"Jaksch Thomas","year":"2010","unstructured":"Thomas Jaksch, Ronald Ortner, and Peter Auer. 2010. Near-Optimal Regret Bounds for Reinforcement Learning. J. Mach. Learn. Res., 11 (2010), aug, 1563\u20131600. issn:1532-4435"},{"key":"e_1_3_2_1_23_1","volume-title":"International Conference on Machine Learning. 1704\u20131713","author":"Jiang Nan","year":"2017","unstructured":"Nan Jiang, Akshay Krishnamurthy, Alekh Agarwal, John Langford, and Robert E Schapire. 2017. Contextual decision processes with low bellman rank are pac-learnable. In International Conference on Machine Learning. 1704\u20131713."},{"key":"e_1_3_2_1_24_1","volume-title":"Is Q-learning provably efficient? Advances in neural information processing systems, 31","author":"Jin Chi","year":"2018","unstructured":"Chi Jin, Zeyuan Allen-Zhu, Sebastien Bubeck, and Michael I Jordan. 2018. Is Q-learning provably efficient? Advances in neural information processing systems, 31 (2018)."},{"key":"e_1_3_2_1_25_1","volume-title":"Bellman eluder dimension: New rich classes of rl problems, and sample-efficient algorithms. Advances in neural information processing systems, 34","author":"Jin Chi","year":"2021","unstructured":"Chi Jin, Qinghua Liu, and Sobhan Miryoosefi. 2021. Bellman eluder dimension: New rich classes of rl problems, and sample-efficient algorithms. Advances in neural information processing systems, 34 (2021), 13406\u201313418."},{"key":"e_1_3_2_1_26_1","volume-title":"Conference on Learning Theory. 2137\u20132143","author":"Jin Chi","year":"2020","unstructured":"Chi Jin, Zhuoran Yang, Zhaoran Wang, and Michael I Jordan. 2020. Provably efficient reinforcement learning with linear function approximation. In Conference on Learning Theory. 2137\u20132143."},{"key":"e_1_3_2_1_27_1","volume-title":"Near-optimal reinforcement learning in polynomial time. Machine learning, 49","author":"Kearns Michael","year":"2002","unstructured":"Michael Kearns and Satinder Singh. 2002. Near-optimal reinforcement learning in polynomial time. Machine learning, 49 (2002), 209\u2013232."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/FOCS52979.2021.00061"},{"key":"e_1_3_2_1_29_1","unstructured":"Wonyoung Kim Garud Iyengar and Assaf Zeevi. 2023. A Doubly Robust Approach to Sparse Reinforcement Learning. arxiv:2310.15286."},{"key":"e_1_3_2_1_30_1","article-title":"Toward Attribute Efficient Learning of Decision Lists and Parities","volume":"7","author":"Klivans Adam R","year":"2006","unstructured":"Adam R Klivans, Rocco A Servedio, and Dana Ron. 2006. Toward Attribute Efficient Learning of Decision Lists and Parities.. Journal of Machine Learning Research, 7, 4 (2006).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_31_1","volume-title":"Dylan J Foster, and Alexander Rakhlin.","author":"Mhammedi Zakaria","year":"2023","unstructured":"Zakaria Mhammedi, Adam Block, Dylan J Foster, and Alexander Rakhlin. 2023. Efficient Model-Free Exploration in Low-Rank MDPs. arXiv preprint arXiv:2307.03997."},{"key":"e_1_3_2_1_32_1","unstructured":"Zakaria Mhammedi Dylan J Foster and Alexander Rakhlin. 2023. Representation Learning with Multi-Step Inverse Kinematics: An Efficient and Optimal Approach to Rich-Observation RL. arXiv preprint arXiv:2304.05889."},{"key":"e_1_3_2_1_33_1","volume-title":"International conference on machine learning. 6961\u20136971","author":"Misra Dipendra","year":"2020","unstructured":"Dipendra Misra, Mikael Henaff, Akshay Krishnamurthy, and John Langford. 2020. Kinematic state abstraction and provably efficient rich-observation reinforcement learning. In International conference on machine learning. 6961\u20136971."},{"key":"e_1_3_2_1_34_1","unstructured":"Aditya Modi Jinglin Chen Akshay Krishnamurthy Nan Jiang and Alekh Agarwal. 2021. Model-free representation learning and exploration in low-rank mdps. arXiv preprint arXiv:2102.07035."},{"key":"e_1_3_2_1_35_1","volume-title":"CoSaMP: Iterative signal recovery from incomplete and inaccurate samples. Applied and computational harmonic analysis, 26, 3","author":"Needell Deanna","year":"2009","unstructured":"Deanna Needell and Joel A Tropp. 2009. CoSaMP: Iterative signal recovery from incomplete and inaccurate samples. Applied and computational harmonic analysis, 26, 3 (2009), 301\u2013321."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/1756006.1859929"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.3003851"},{"key":"e_1_3_2_1_38_1","volume-title":"Conference on Learning Theory. 2652\u20132663","author":"Reeves Galen","year":"2019","unstructured":"Galen Reeves, Jiaming Xu, and Ilias Zadik. 2019. The all-or-nothing phenomenon in sparse linear regression. In Conference on Learning Theory. 2652\u20132663."},{"key":"e_1_3_2_1_39_1","volume-title":"Learning decision lists. Machine learning, 2","author":"Rivest Ronald L","year":"1987","unstructured":"Ronald L Rivest. 1987. Learning decision lists. Machine learning, 2 (1987), 229\u2013246."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022679002094"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Ahmad EL Sallab Mohammed Abdou Etienne Perot and Senthil Yogamani. 2017. Deep reinforcement learning framework for autonomous driving. arXiv preprint arXiv:1704.02532.","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2016.2606605"},{"key":"e_1_3_2_1_43_1","volume-title":"A general reinforcement learning algorithm that masters chess, shogi, and Go through self-play. Science, 362, 6419","author":"Silver David","year":"2018","unstructured":"David Silver, Thomas Hubert, Julian Schrittwieser, Ioannis Antonoglou, Matthew Lai, Arthur Guez, Marc Lanctot, Laurent Sifre, Dharshan Kumaran, and Thore Graepel. 2018. A general reinforcement learning algorithm that masters chess, shogi, and Go through self-play. Science, 362, 6419 (2018), 1140\u20131144."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.2517-6161.1996.tb02080.x"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2007.909108"},{"key":"e_1_3_2_1_46_1","volume-title":"LASSO with cross-validation for genomic selection. Genetics research, 91, 6","author":"Usai M Graziano","year":"2009","unstructured":"M Graziano Usai, Mike E Goddard, and Ben J Hayes. 2009. LASSO with cross-validation for genomic selection. Genetics research, 91, 6 (2009), 427\u2013436."},{"key":"e_1_3_2_1_47_1","volume-title":"Sharp thresholds for High-Dimensional and noisy sparsity recovery using l1-Constrained Quadratic Programming (Lasso)","author":"Wainwright Martin J","year":"2009","unstructured":"Martin J Wainwright. 2009. Sharp thresholds for High-Dimensional and noisy sparsity recovery using l1-Constrained Quadratic Programming (Lasso). IEEE transactions on information theory, 55, 5 (2009), 2183\u20132202."},{"key":"e_1_3_2_1_48_1","first-page":"6123","article-title":"Reinforcement learning with general value function approximation: Provably efficient approach via bounded eluder dimension","volume":"33","author":"Wang Ruosong","year":"2020","unstructured":"Ruosong Wang, Russ R Salakhutdinov, and Lin Yang. 2020. Reinforcement learning with general value function approximation: Provably efficient approach via bounded eluder dimension. Advances in Neural Information Processing Systems, 33 (2020), 6123\u20136135.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_49_1","volume-title":"Proceedings of the 26th International Conference on Neural Information Processing Systems -","volume":"2","author":"Wen Zheng","year":"2013","unstructured":"Zheng Wen and Benjamin Van Roy. 2013. Efficient Exploration and Value Function Generalization in Deterministic Systems. In Proceedings of the 26th International Conference on Neural Information Processing Systems - Volume 2 (NIPS\u201913). Curran Associates Inc., Red Hook, NY, USA. 3021\u20133029."},{"key":"e_1_3_2_1_50_1","unstructured":"Tengyang Xie Dylan J Foster Yu Bai Nan Jiang and Sham M Kakade. 2022. The role of coverage in online reinforcement learning. arXiv preprint arXiv:2210.04157."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477600"},{"key":"e_1_3_2_1_52_1","volume-title":"International Conference on Machine Learning. 26517\u201326547","author":"Zhang Xuezhou","year":"2022","unstructured":"Xuezhou Zhang, Yuda Song, Masatoshi Uehara, Mengdi Wang, Alekh Agarwal, and Wen Sun. 2022. Efficient reinforcement learning in block mdps: A model-free representation learning approach. In International Conference on Machine Learning. 26517\u201326547."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jempfin.2019.08.007"},{"key":"e_1_3_2_1_54_1","volume-title":"International Conference on Artificial Intelligence and Statistics. 4006\u20134032","author":"Zhu Hanlin","year":"2023","unstructured":"Hanlin Zhu, Ruosong Wang, and Jason Lee. 2023. Provably Efficient Reinforcement Learning via Surprise Bound. In International Conference on Artificial Intelligence and Statistics. 4006\u20134032."}],"event":{"name":"STOC '24: 56th Annual ACM Symposium on Theory of Computing","sponsor":["SIGACT ACM Special Interest Group on Algorithms and Computation Theory"],"location":"Vancouver BC Canada","acronym":"STOC '24"},"container-title":["Proceedings of the 56th Annual ACM Symposium on Theory of Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3618260.3649710","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3618260.3649710","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:52Z","timestamp":1750291432000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3618260.3649710"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,10]]},"references-count":54,"alternative-id":["10.1145\/3618260.3649710","10.1145\/3618260"],"URL":"https:\/\/doi.org\/10.1145\/3618260.3649710","relation":{},"subject":[],"published":{"date-parts":[[2024,6,10]]},"assertion":[{"value":"2024-06-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}