{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,11]],"date-time":"2025-01-11T05:32:58Z","timestamp":1736573578047,"version":"3.32.0"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T00:00:00Z","timestamp":1731110400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T00:00:00Z","timestamp":1731110400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,11,9]]},"DOI":"10.1109\/scisisis61014.2024.10760041","type":"proceedings-article","created":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T18:35:23Z","timestamp":1733164523000},"page":"1-4","source":"Crossref","is-referenced-by-count":0,"title":["The Effect of Different Experimental Settings on Off- Policy Evaluations"],"prefix":"10.1109","author":[{"given":"Mariko","family":"Sugimura","sequence":"first","affiliation":[{"name":"Graduate School of Humanities and Sciences, Ochanomizu University,Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ichiro","family":"Kobayashi","sequence":"additional","affiliation":[{"name":"Graduate School of Humanities and Sciences, Ochanomizu University,Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Empirical study of off-policy policy evaluation for reinforcement learning","author":"Voloshin","year":"2019","journal-title":"ArXiv, abs11911.06854"},{"journal-title":"Doubly robust off-policy value evaluation for reinforcement learning","year":"2016","author":"Jiang","key":"ref2"},{"key":"ref3","article-title":"More robust doubly robust off-policy evaluation","author":"Farajtabar","year":"2018","journal-title":"CoRR, absI1802.03493"},{"key":"ref4","article-title":"Batch policy learning under constraints","author":"Minh Le","year":"2019","journal-title":"CoRR, absI1903.08738"},{"key":"ref5","article-title":"Doubly robust policy evaluation and learning","author":"Dudik","year":"2011","journal-title":"CoRR, abs\/1103.4601"},{"key":"ref6","article-title":"Data-efficient off-policy policy evaluation for reinforcement learning","author":"Thomas","year":"2016","journal-title":"CoRR, absI1604.00923"},{"key":"ref7","article-title":"Counterfactual off-policy evaluation with gumbel-max structural causal models","author":"Oberst","year":"2019","journal-title":"CoRR, absI1905.05824"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CDC49753.2023.10383469"},{"key":"ref9","article-title":"Q(A) with off-policy corrections","author":"Harutyunyan","year":"2016","journal-title":"CoRR, absI1602.04951"},{"key":"ref10","article-title":"Safe and efficient off-policy reinforcement learning","author":"Munos","year":"2016","journal-title":"CoRR, absI1606.02647"},{"article-title":"Eligibility traces for off-policy policy evaluation","volume-title":"International Conference on Machine Learning","author":"Precup","key":"ref11"},{"key":"ref12","article-title":"Breaking the curse of horizon: Infinite-horizon off-policy estimation","volume-title":"Advances in Neural Information Processing Systems","volume":"31","author":"Liu","year":"2018"},{"key":"ref13","first-page":"9659","article-title":"Minimax weight and q-function learning for off-policy evaluation","volume-title":"Proceedings of the 37th International Conference on Machine Learning, volume 119 of Proceedings of Machine Learning Research","author":"Uehara"},{"key":"ref14","first-page":"2747","article-title":"Minimax value interval for off-policy evaluation and policy optimization","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Jiang","year":"2020"},{"key":"ref15","article-title":"Dualdice: Behavior-agnostic estimation of discounted stationary distribution corrections","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Nachum","year":"2019"},{"key":"ref16","first-page":"11194","article-title":"GradientDICE: Rethinking generalized offline estimation of stationary values","volume-title":"Proceedings of the 37th Inter-national Conference on Machine Learning, volume 119 of Proceedings of Machine Learning Research","author":"Zhang","year":"2020"},{"key":"ref17","first-page":"6551","article-title":"Off-policy evaluation via the regularized lagrangian","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Yang","year":"2020"},{"key":"ref18","first-page":"9398","article-title":"Coindice: Off-policy confidence interval estimation","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Dai","year":"2020"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1093\/imamat\/2.3.228"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1090\/psapm\/011\/0119381"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1952.10483446"},{"journal-title":"Learning from logged implicit exploration data","year":"2010","author":"Strehl","key":"ref22"},{"key":"ref23","article-title":"The self-normalized esti-mator for counterfactuallearning","volume-title":"Advances in Neural Information Processing Systems","volume":"28","author":"Swaminathan","year":"2015"}],"event":{"name":"2024 Joint 13th International Conference on Soft Computing and Intelligent Systems and 25th International Symposium on Advanced Intelligent Systems (SCIS&amp;ISIS)","start":{"date-parts":[[2024,11,9]]},"location":"Himeji, Japan","end":{"date-parts":[[2024,11,12]]}},"container-title":["2024 Joint 13th International Conference on Soft Computing and Intelligent Systems and 25th International Symposium on Advanced Intelligent Systems (SCIS&amp;amp;ISIS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10759795\/10759863\/10760041.pdf?arnumber=10760041","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,10]],"date-time":"2025-01-10T19:46:00Z","timestamp":1736538360000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10760041\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,9]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/scisisis61014.2024.10760041","relation":{},"subject":[],"published":{"date-parts":[[2024,11,9]]}}}