{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T05:18:46Z","timestamp":1740719926764,"version":"3.38.0"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T00:00:00Z","timestamp":1734307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T00:00:00Z","timestamp":1734307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,16]]},"DOI":"10.1109\/cdc56724.2024.10885945","type":"proceedings-article","created":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T18:43:32Z","timestamp":1740595412000},"page":"1181-1186","source":"Crossref","is-referenced-by-count":0,"title":["Robust Q-Learning under Corrupted Rewards"],"prefix":"10.1109","author":[{"given":"Sreejeet","family":"Maity","sequence":"first","affiliation":[{"name":"North Carolina State University,Department of Electrical and Computer Engineering"}]},{"given":"Aritra","family":"Mitra","sequence":"additional","affiliation":[{"name":"North Carolina State University,Department of Electrical and Computer Engineering"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-8277-6"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022689125041"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21236\/ADA276517"},{"key":"ref5","article-title":"The asymptotic convergence-rate of Q-learning","volume-title":"Advances in neural information processing systems","volume":"10","author":"Szepesv\u00e1ri"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2020.2024"},{"key":"ref7","article-title":"Q-learning with nearest neighbors","volume":"31","author":"Shah","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","first-page":"2803","article-title":"Finite-time error bounds for linear stochastic approximation and TD learning","volume-title":"Conference on Learning Theory","author":"Srikant"},{"key":"ref9","article-title":"Stochastic approximation with cone-contractive operators: Sharp ell_infty-bounds for Q-learning","author":"Wainwright","year":"2019","journal-title":"arXiv preprint arXiv:1905.06265"},{"key":"ref10","first-page":"1","article-title":"Finite-time analysis of asynchronous stochastic approximation and Q-learning","volume-title":"Proceedings of Machine Learning Research","volume":"125","author":"Guannan Qu"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2023.2450"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-4380-9_35"},{"volume":"523","volume-title":"Robust statistics","year":"2004","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1214\/20-AOS1961"},{"key":"ref15","article-title":"Finite-sample convergence rates for Q-learning and indirect algorithms","volume":"11","author":"Kearns","year":"1998","journal-title":"Advances in neural information processing systems"},{"issue":"1","key":"ref16","article-title":"Learning rates for Q-learning","volume":"5","author":"Even-Dar","year":"2003","journal-title":"Journal of machine learning Research"},{"key":"ref17","article-title":"Near-optimal time and sample complexities for solving markov decision processes with a generative model","volume":"31","author":"Sidford","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysconle.2012.08.014"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3188745.3188918"},{"key":"ref20","first-page":"1562","article-title":"Better algorithms for stochastic bandits with adversarial corruptions","volume-title":"Conference on Learning Theory","author":"Gupta"},{"key":"ref21","article-title":"Robust Q-learning under corrupted rewards","author":"Maity","year":"2024","journal-title":"arXiv preprint arXiv:2409.03237"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref22"},{"key":"ref23","first-page":"325","article-title":"Minimax pac bounds on the sample complexity of reinforcement learning with a generative model","author":"Azar","year":"2013"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1017\/9781108943161"}],"event":{"name":"2024 IEEE 63rd Conference on Decision and Control (CDC)","start":{"date-parts":[[2024,12,16]]},"location":"Milan, Italy","end":{"date-parts":[[2024,12,19]]}},"container-title":["2024 IEEE 63rd Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10885784\/10885785\/10885945.pdf?arnumber=10885945","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T08:05:44Z","timestamp":1740643544000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10885945\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,16]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/cdc56724.2024.10885945","relation":{},"subject":[],"published":{"date-parts":[[2024,12,16]]}}}