{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:23:01Z","timestamp":1750220581110,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":6,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,1,17]],"date-time":"2020-01-17T00:00:00Z","timestamp":1579219200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,1,17]]},"DOI":"10.1145\/3380688.3380706","type":"proceedings-article","created":{"date-parts":[[2020,3,7]],"date-time":"2020-03-07T12:20:13Z","timestamp":1583583613000},"page":"51-55","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Sublinear-Regret Reinforcement Learning Algorithm on Constrained Markov Decision Processes with reset action"],"prefix":"10.1145","author":[{"given":"Takashi","family":"Watanabe","sequence":"first","affiliation":[{"name":"Kyoto University, Graduate school of Human and Environmental Studies, Kyoto city, Japan"}]},{"given":"Takashi","family":"Sakuragawa","sequence":"additional","affiliation":[{"name":"Kyoto University, Graduate school of Human and Environmental Studies, Kyoto city, Japan"}]}],"member":"320","published-online":{"date-parts":[[2020,3,7]]},"reference":[{"key":"e_1_3_2_1_1_1","series-title":"Vol. 7","volume-title":"Constrained Markov decision processes","author":"Altman Eitan","unstructured":"Altman , Eitan . 1999. Constrained Markov decision processes ( Vol. 7 ) . CRC Press . Altman, Eitan. 1999. Constrained Markov decision processes (Vol. 7). CRC Press."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.2748\/tmj\/1178243286"},{"volume-title":"The Collected Works of Wassily Hoeffding, 409--426.","author":"Hoeffding Wassily","key":"e_1_3_2_1_3_1","unstructured":"Hoeffding , Wassily . 1994. Probability inequalities for sums of bounded random variables . In The Collected Works of Wassily Hoeffding, 409--426. , Springer , New York, NY . Hoeffding, Wassily. 1994. Probability inequalities for sums of bounded random variables. In The Collected Works of Wassily Hoeffding, 409--426., Springer, New York, NY."},{"key":"e_1_3_2_1_4_1","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"Puterman","unstructured":"Martin L. Puterman . 1994. Markov Decision Processes: Discrete Stochastic Dynamic Programming ( 1 st ed.). John Wiley & Sons, Inc. , New York, NY, USA . Martin L. Puterman. 1994. Markov Decision Processes: Discrete Stochastic Dynamic Programming (1st ed.). John Wiley & Sons, Inc., New York, NY, USA.","edition":"1"},{"key":"e_1_3_2_1_5_1","volume-title":"Near-optimal Regret Bounds for Reinforcement Learning. J. Mach. Learn. Res. 11 (August","author":"Thomas Jaksch Ronald Ortner","year":"2010","unstructured":"Thomas Jaksch , Ronald Ortner , and Peter Auer . 2010. Near-optimal Regret Bounds for Reinforcement Learning. J. Mach. Learn. Res. 11 (August 2010 ), 1563--1600. Thomas Jaksch, Ronald Ortner, and Peter Auer. 2010. Near-optimal Regret Bounds for Reinforcement Learning. J. Mach. Learn. Res. 11 (August 2010), 1563--1600."},{"key":"e_1_3_2_1_6_1","unstructured":"Tossou A. Basu D. & Dimitrakakis C. 2019. Near-optimal Optimistic Reinforcement Learning using Empirical Bernstein Inequalities. arXiv preprint arXiv:1905.12425  Tossou A. Basu D. & Dimitrakakis C. 2019. Near-optimal Optimistic Reinforcement Learning using Empirical Bernstein Inequalities. arXiv preprint arXiv:1905.12425"}],"event":{"name":"ICMLSC 2020: The 4th International Conference on Machine Learning and Soft Computing","sponsor":["NICT National Institute of Information and Communications Technology"],"location":"Haiphong City Viet Nam","acronym":"ICMLSC 2020"},"container-title":["Proceedings of the 4th International Conference on Machine Learning and Soft Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3380688.3380706","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3380688.3380706","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:31:32Z","timestamp":1750195892000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3380688.3380706"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,1,17]]},"references-count":6,"alternative-id":["10.1145\/3380688.3380706","10.1145\/3380688"],"URL":"https:\/\/doi.org\/10.1145\/3380688.3380706","relation":{},"subject":[],"published":{"date-parts":[[2020,1,17]]},"assertion":[{"value":"2020-03-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}