{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:48:28Z","timestamp":1755794908151,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Department of Defense"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709176","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:42:22Z","timestamp":1743792142000},"page":"165-176","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CSPI-MT: Calibrated Safe Policy Improvement with Multiple Testing for Threshold Policies"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3558-0415","authenticated-orcid":false,"given":"Brian","family":"Cho","sequence":"first","affiliation":[{"name":"Cornell University, New York, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3482-2734","authenticated-orcid":false,"given":"Ana-Roxana","family":"Pop","sequence":"additional","affiliation":[{"name":"Meta, New York, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5147-8747","authenticated-orcid":false,"given":"Kyra","family":"Gan","sequence":"additional","affiliation":[{"name":"Cornell University, New York, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3849-2317","authenticated-orcid":false,"given":"Sam","family":"Corbett-Davies","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8698-4730","authenticated-orcid":false,"given":"Israel","family":"Nir","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5562-1962","authenticated-orcid":false,"given":"Ariel","family":"Evnine","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1672-0507","authenticated-orcid":false,"given":"Nathan","family":"Kallus","sequence":"additional","affiliation":[{"name":"Cornell University &amp; Netflix, New York, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Policy learning with observational data","author":"Athey Susan","year":"2020","unstructured":"Susan Athey and Stefan Wager. Policy learning with observational data, 2020."},{"key":"e_1_3_2_2_2_1","volume-title":"Efficient concentration with gaussian approximation","author":"Austern Morgane","year":"2024","unstructured":"Morgane Austern and Lester Mackey. Efficient concentration with gaussian approximation, 2024."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1257\/app.6.4.142"},{"key":"e_1_3_2_2_4_1","volume-title":"Safe policy learning through extrapolation: Application to pre-trial risk assessment","author":"Ben-Michael Eli","year":"2022","unstructured":"Eli Ben-Michael, D. James Greiner, Kosuke Imai, and Zhichao Jiang. Safe policy learning through extrapolation: Application to pre-trial risk assessment, 2022."},{"key":"e_1_3_2_2_5_1","volume-title":"March","author":"Ronen","year":"2003","unstructured":"Ronen I. Brafman and Moshe Tennenholtz. R-max - a general polynomial time algorithm for near-optimal reinforcement learning. J. Mach. Learn. Res., 3(null):213--231, March 2003."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Victor Chernozhukov Denis Chetverikov Mert Demirer Esther Duflo Christian Hansen Whitney Newey and James Robins. Double\/debiased machine learning for treatment and causal parameters 2017.","DOI":"10.3386\/w23564"},{"key":"e_1_3_2_2_7_1","volume-title":"Peeking with peak: Sequential, nonparametric composite hypothesis tests for means of multiple data streams","author":"Cho Brian","year":"2024","unstructured":"Brian Cho, Kyra Gan, and Nathan Kallus. Peeking with peak: Sequential, nonparametric composite hypothesis tests for means of multiple data streams, 2024."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/62.2.441"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10654-008-9230-x"},{"key":"e_1_3_2_2_10_1","volume-title":"A comparison of methods for treatment assignment with an application to playlist generation","author":"Fern\u00e1ndez-Lor\u00eda Carlos","year":"2022","unstructured":"Carlos Fern\u00e1ndez-Lor\u00eda, Foster Provost, Jesse Anderton, Benjamin Carterette, and Praveen Chandar. A comparison of methods for treatment assignment with an application to playlist generation, 2022."},{"key":"e_1_3_2_2_11_1","volume-title":"Advances in Neural Information Processing Systems","author":"Ghavamzadeh Mohammad","year":"2016","unstructured":"Mohammad Ghavamzadeh, Marek Petrik, and Yinlam Chow. Safe policy improvement by minimizing robust baseline regret. In D. Lee, M. Sugiyama, U. Luxburg, I. Guyon, and R. Garnett, editors, Advances in Neural Information Processing Systems, volume 29. Curran Associates, Inc., 2016."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-2649-2"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.3982\/ECTA6630"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.3885"},{"key":"e_1_3_2_2_15_1","volume-title":"Treatment effect risk: Bounds and inference","author":"Kallus Nathan","year":"2022","unstructured":"Nathan Kallus. Treatment effect risk: Bounds and inference, 2022."},{"issue":"167","key":"e_1_3_2_2_16_1","first-page":"1","article-title":"Double reinforcement learning for efficient off-policy evaluation in markov decision processes","volume":"21","author":"Kallus Nathan","year":"2020","unstructured":"Nathan Kallus and Masatoshi Uehara. Double reinforcement learning for efficient off-policy evaluation in markov decision processes. Journal of Machine Learning Research, 21(167):1--63, 2020.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_17_1","volume-title":"Hepatology","author":"Kim WR","year":"2007","unstructured":"Kim WR Kamath PS. The model for end-stage liver disease (meld). Hepatology, 2007."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.3982\/ECTA13288"},{"key":"e_1_3_2_2_19_1","volume-title":"Safe policy improvement with baseline bootstrapping","author":"Laroche Romain","year":"2019","unstructured":"Romain Laroche, Paul Trichelair, and R\u00e9mi Tachet des Combes. Safe policy improvement with baseline bootstrapping, 2019."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1002\/jae.2656"},{"key":"e_1_3_2_2_21_1","volume-title":"Safe policy improvement with soft baseline bootstrapping","author":"Nadjahi Kimia","year":"2019","unstructured":"Kimia Nadjahi, Romain Laroche, and R\u00e9mi Tachet des Combes. Safe policy improvement with soft baseline bootstrapping, 2019."},{"key":"e_1_3_2_2_22_1","volume-title":"Advances in Neural Information Processing Systems","author":"Nilim Arnab","year":"2003","unstructured":"Arnab Nilim and Laurent Ghaoui. Robustness in markov decision problems with uncertain transition matrices. In S. Thrun, L. Saul, and B. Sch\u00f6lkopf, editors, Advances in Neural Information Processing Systems, volume 16. MIT Press, 2003."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3326937.3341258"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_2_25_1","volume-title":"A method to increase the power of multiple testing procedures through sample splitting. Statistical Applications in Genetics and Molecular Biology, 5(1)","author":"Rubin Daniel","year":"2006","unstructured":"Daniel Rubin, Sandrine Dudoit, and Mark Van der Laan. A method to increase the power of multiple testing procedures through sample splitting. Statistical Applications in Genetics and Molecular Biology, 5(1), 2006."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1037\/h0037350"},{"key":"e_1_3_2_2_27_1","volume-title":"Uncertainty in Reinforcement Learning - Awareness, Quantisation, and Control. 08","author":"Schneegass Daniel","year":"2010","unstructured":"Daniel Schneegass, Alexander Hans, and Steffen Udluft. Uncertainty in Reinforcement Learning - Awareness, Quantisation, and Control. 08 2010."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-22953-4_4"},{"key":"e_1_3_2_2_29_1","series-title":"Proceedings of Machine Learning Research","first-page":"2380","volume-title":"Proceedings of the 32nd International Conference on Machine Learning","author":"Thomas Philip","year":"2015","unstructured":"Philip Thomas, Georgios Theocharous, and Mohammad Ghavamzadeh. High confidence policy improvement. In Francis Bach and David Blei, editors, Proceedings of the 32nd International Conference on Machine Learning, volume 37 of Proceedings of Machine Learning Research, pages 2380--2388, Lille, France, 07--09 Jul 2015. PMLR."},{"key":"e_1_3_2_2_30_1","volume-title":"Asymptotic Statistics. Number 9780521784504 in Cambridge Books","author":"van der Vaart A. W.","year":"2000","unstructured":"A. W. van der Vaart. Asymptotic Statistics. Number 9780521784504 in Cambridge Books. Cambridge University Press, November 2000."},{"key":"e_1_3_2_2_31_1","volume-title":"Optimal and adaptive off-policy evaluation in contextual bandits","author":"Wang Yu-Xiang","year":"2017","unstructured":"Yu-Xiang Wang, Alekh Agarwal, and Miroslav Dudik. Optimal and adaptive off-policy evaluation in contextual bandits, 2017."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709176","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709176","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:39:53Z","timestamp":1755358793000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709176"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":31,"alternative-id":["10.1145\/3690624.3709176","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709176","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}