{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T00:34:46Z","timestamp":1773362086733,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,2,11]],"date-time":"2022-02-11T00:00:00Z","timestamp":1644537600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,2,11]]},"DOI":"10.1145\/3488560.3498380","type":"proceedings-article","created":{"date-parts":[[2022,2,15]],"date-time":"2022-02-15T21:42:57Z","timestamp":1644961377000},"page":"487-497","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":28,"title":["Doubly Robust Off-Policy Evaluation for Ranking Policies under the Cascade Behavior Model"],"prefix":"10.1145","author":[{"given":"Haruka","family":"Kiyohara","sequence":"first","affiliation":[{"name":"Tokyo Institute of Technology, Tokyo, Japan"}]},{"given":"Yuta","family":"Saito","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"given":"Tatsuya","family":"Matsuhiro","sequence":"additional","affiliation":[{"name":"Yahoo Japan Corporation, Tokyo, Japan"}]},{"given":"Yusuke","family":"Narita","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, CT, USA"}]},{"given":"Nobuyuki","family":"Shimizu","sequence":"additional","affiliation":[{"name":"Yahoo Japan Corporation, Tokyo, Japan"}]},{"given":"Yasuo","family":"Yamamoto","sequence":"additional","affiliation":[{"name":"Yahoo Japan Corporation, Tokyo, Japan"}]}],"member":"320","published-online":{"date-parts":[[2022,2,15]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1557019.1557040"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1214\/14-STS500"},{"key":"e_1_3_2_2_3_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning","volume":"80","author":"Farajtabar Mehrdad","year":"2018","unstructured":"Mehrdad Farajtabar, Yinlam Chow, and Mohammad Ghavamzadeh. 2018. More Robust Doubly Robust Off-Policy Evaluation. In Proceedings of the 35th International Conference on Machine Learning, Vol. 80. PMLR, 1447--1456."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159687"},{"key":"e_1_3_2_2_5_1","volume-title":"Proceedings of the 12th ACM International Conference on Web Search and Data Mining . 420--428","author":"Gruson Alois","year":"2019","unstructured":"Alois Gruson, Praveen Chandar, Christophe Charbuillet, James McInerney, Samantha Hansen, Damien Tardieu, and Ben Carterette. 2019. Offline Evaluation to Make Decisions About Playlist Recommendation Algorithms. In Proceedings of the 12th ACM International Conference on Web Search and Data Mining . 420--428."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498759.1498818"},{"key":"e_1_3_2_2_7_1","volume-title":"ACM Transactions on Information Systems (TOIS)","volume":"20","author":"Jaana Kalervo","year":"2002","unstructured":"Kalervo J\"arvelin and Jaana Kek\"al\"ainen. 2002. Cumulated Gain-Based Evaluation of IR Techniques. ACM Transactions on Information Systems (TOIS) , Vol. 20, 4 (2002), 422--446."},{"key":"e_1_3_2_2_8_1","volume-title":"Proceedings of the 33rd International Conference on Machine Learning","volume":"48","author":"Jiang Nan","year":"2016","unstructured":"Nan Jiang and Lihong Li. 2016. Doubly Robust Off-Policy Value Evaluation for Reinforcement Learning. In Proceedings of the 33rd International Conference on Machine Learning, Vol. 48. PMLR, 652--661."},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Kallus Nathan","year":"2021","unstructured":"Nathan Kallus, Yuta Saito, and Masatoshi Uehara. 2021. Optimal Off-Policy Evaluation from Multiple Logging Policies. In Proceedings of the 38th International Conference on Machine Learning, Vol. 139. PMLR, 5247--5256."},{"key":"e_1_3_2_2_10_1","volume-title":"Accelerating Offline Reinforcement Learning Application in Real-Time Bidding and Recommendation: Potential Use of Simulation. arXiv preprint arXiv:2109.08331","author":"Kiyohara Haruka","year":"2021","unstructured":"Haruka Kiyohara, Kosuke Kawakami, and Yuta Saito. 2021. Accelerating Offline Reinforcement Learning Application in Real-Time Bidding and Recommendation: Potential Use of Simulation. arXiv preprint arXiv:2109.08331 (2021)."},{"key":"e_1_3_2_2_11_1","volume-title":"Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems. arXiv preprint arXiv:2005.01643","author":"Levine Sergey","year":"2020","unstructured":"Sergey Levine, Aviral Kumar, George Tucker, and Justin Fu. 2020. Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems. arXiv preprint arXiv:2005.01643 (2020)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220028"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403229"},{"key":"e_1_3_2_2_14_1","volume-title":"Proceedings of the 17th International Conference on Machine Learning. 759--766","author":"Precup Doina","unstructured":"Doina Precup, Richard S. Sutton, and Satinder P. Singh. 2000. Eligibility Traces for Off-Policy Policy Evaluation. In Proceedings of the 17th International Conference on Machine Learning. 759--766."},{"key":"e_1_3_2_2_15_1","volume-title":"Doubly Robust Estimator for Ranking Metrics with Post-Click Conversions. In 14th ACM Conference on Recommender Systems. 92--100","author":"Saito Yuta","year":"2020","unstructured":"Yuta Saito. 2020. Doubly Robust Estimator for Ranking Metrics with Post-Click Conversions. In 14th ACM Conference on Recommender Systems. 92--100."},{"key":"e_1_3_2_2_16_1","volume-title":"2020 a. Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation. arXiv preprint arXiv:2008.07146","author":"Saito Yuta","year":"2020","unstructured":"Yuta Saito, Shunsuke Aihara, Megumi Matsutani, and Yusuke Narita. 2020 a. Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation. arXiv preprint arXiv:2008.07146 (2020)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473320"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474245"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371783"},{"key":"e_1_3_2_2_20_1","first-page":"2217","article-title":"Learning from Logged Implicit Exploration Data","volume":"23","author":"Strehl Alex","year":"2010","unstructured":"Alex Strehl, John Langford, Lihong Li, and Sham M Kakade. 2010. Learning from Logged Implicit Exploration Data. In Advances in Neural Information Processing Systems, Vol. 23. 2217--2225.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning","volume":"119","author":"Su Yi","year":"2020","unstructured":"Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dud'ik. 2020. Doubly Robust Off-Policy Evaluation with Shrinkage. In Proceedings of the 37th International Conference on Machine Learning, Vol. 119. PMLR, 9167--9176."},{"key":"e_1_3_2_2_22_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S","unstructured":"Richard S Sutton and Andrew G Barto. 2018. Reinforcement learning: An introduction .MIT press."},{"key":"e_1_3_2_2_23_1","first-page":"3632","article-title":"Off-Policy Evaluation for Slate Recommendation","volume":"30","author":"Swaminathan Adith","year":"2017","unstructured":"Adith Swaminathan, Akshay Krishnamurthy, Alekh Agarwal, Miro Dudik, John Langford, Damien Jose, and Imed Zitouni. 2017. Off-Policy Evaluation for Slate Recommendation. In Advances in Neural Information Processing Systems, Vol. 30. 3632--3642.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_24_1","volume-title":"Proceedings of the 33rd International Conference on Machine Learning","volume":"48","author":"Thomas Philip","year":"2016","unstructured":"Philip Thomas and Emma Brunskill. 2016. Data-Efficient Off-Policy Policy Evaluation for Reinforcement Learning. In Proceedings of the 33rd International Conference on Machine Learning, Vol. 48. PMLR, 2139--2148."},{"key":"e_1_3_2_2_25_1","volume-title":"Fernando Amat Gil, and Ashok Chandrashekar","author":"Vlassis Nikos","year":"2021","unstructured":"Nikos Vlassis, Fernando Amat Gil, and Ashok Chandrashekar. 2021. Off-Policy Evaluation of Slate Policies under Bayes Risk. arXiv preprint arXiv:2101.02553 (2021)."}],"event":{"name":"WSDM '22: The Fifteenth ACM International Conference on Web Search and Data Mining","location":"Virtual Event AZ USA","acronym":"WSDM '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498380","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3488560.3498380","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:50Z","timestamp":1750191530000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498380"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,11]]},"references-count":25,"alternative-id":["10.1145\/3488560.3498380","10.1145\/3488560"],"URL":"https:\/\/doi.org\/10.1145\/3488560.3498380","relation":{},"subject":[],"published":{"date-parts":[[2022,2,11]]},"assertion":[{"value":"2022-02-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}