{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:14:33Z","timestamp":1757312073317,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1936219,62141607"],"award-info":[{"award-number":["U1936219,62141607"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Academy of Artificial Intelligence (BAAI)"},{"name":"National Key R&D Program of China","award":["2018AAA0102004, 2020AAA0106300"],"award-info":[{"award-number":["2018AAA0102004, 2020AAA0106300"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583448","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:25Z","timestamp":1682551825000},"page":"1220-1230","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Offline Policy Evaluation in Large Action Spaces via Outcome-Oriented Action Grouping"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4170-7339","authenticated-orcid":false,"given":"Jie","family":"Peng","sequence":"first","affiliation":[{"name":"Tsinghua University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6000-6936","authenticated-orcid":false,"given":"Hao","family":"Zou","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9159-1752","authenticated-orcid":false,"given":"Jiashuo","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4915-9958","authenticated-orcid":false,"given":"Shaoming","family":"Li","sequence":"additional","affiliation":[{"name":"Meituan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8985-9812","authenticated-orcid":false,"given":"Yibao","family":"Jiang","sequence":"additional","affiliation":[{"name":"Meituan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2200-8711","authenticated-orcid":false,"given":"Jian","family":"Pei","sequence":"additional","affiliation":[{"name":"Duke University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2957-8511","authenticated-orcid":false,"given":"Peng","family":"Cui","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098155"},{"key":"e_1_3_2_1_2_1","volume-title":"Estimating the Effects of Continuous-valued Interventions using Generative Adversarial Networks. CoRR abs\/2002.12326","author":"Bica Ioana","year":"2020","unstructured":"Ioana Bica, James Jordon, and Mihaela van\u00a0der Schaar. 2020. Estimating the Effects of Continuous-valued Interventions using Generative Adversarial Networks. CoRR abs\/2002.12326 (2020). arXiv:2002.12326https:\/\/arxiv.org\/abs\/2002.12326"},{"key":"e_1_3_2_1_3_1","article-title":"Counterfactual Reasoning and Learning Systems: The Example of Computational Advertising.","volume":"14","author":"Bottou L\u00e9on","year":"2013","unstructured":"L\u00e9on Bottou, Jonas Peters, Joaquin Qui\u00f1onero-Candela, Denis\u00a0X Charles, D\u00a0Max Chickering, Elon Portugaly, Dipankar Ray, Patrice Simard, and Ed Snelson. 2013. Counterfactual Reasoning and Learning Systems: The Example of Computational Advertising.Journal of Machine Learning Research 14, 11 (2013).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_4_1","volume-title":"International Conference on Machine Learning. PMLR, 854\u2013863","author":"Cisse Moustapha","year":"2017","unstructured":"Moustapha Cisse, Piotr Bojanowski, Edouard Grave, Yann Dauphin, and Nicolas Usunier. 2017. Parseval networks: Improving robustness to adversarial examples. In International Conference on Machine Learning. PMLR, 854\u2013863."},{"key":"e_1_3_2_1_5_1","unstructured":"Miroslav Dud\u00edk John Langford and Lihong Li. 2011. Doubly Robust Policy Evaluation and Learning. In ICML."},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Machine Learning. PMLR, 1447\u20131456","author":"Farajtabar Mehrdad","year":"2018","unstructured":"Mehrdad Farajtabar, Yinlam Chow, and Mohammad Ghavamzadeh. 2018. More robust doubly robust off-policy evaluation. In International Conference on Machine Learning. PMLR, 1447\u20131456."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159687"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1952.10483446"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/775047.775067"},{"key":"e_1_3_2_1_10_1","volume-title":"Balanced policy evaluation and learning. Advances in neural information processing systems 31","author":"Kallus Nathan","year":"2018","unstructured":"Nathan Kallus. 2018. Balanced policy evaluation and learning. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_1_11_1","volume-title":"International conference on artificial intelligence and statistics. PMLR, 1243\u20131251","author":"Kallus Nathan","year":"2018","unstructured":"Nathan Kallus and Angela Zhou. 2018. Policy evaluation and optimization with continuous treatments. In International conference on artificial intelligence and statistics. PMLR, 1243\u20131251."},{"key":"e_1_3_2_1_12_1","volume-title":"Retail advertising works! measuring the effects of advertising on sales via a controlled experiment on yahoo!","author":"Lewis Randall","year":"2009","unstructured":"Randall Lewis and David Reiley. 2009. Retail advertising works! measuring the effects of advertising on sales via a controlled experiment on yahoo! (2009)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2740908.2742562"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1935826.1935878"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530713"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/70.1.41"},{"key":"e_1_3_2_1_17_1","unstructured":"Yuta Saito Shunsuke Aihara Megumi Matsutani and Yusuke Narita. 2021. Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks J.\u00a0Vanschoren and S.\u00a0Yeung (Eds.). Vol.\u00a01. https:\/\/datasets-benchmarks-proceedings.neurips.cc\/paper\/2021\/file\/33e75ff09dd601bbe69f351039152189-Paper-round2.pdf"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning. PMLR","author":"Saito Yuta","year":"2022","unstructured":"Yuta Saito and Thorsten Joachims. 2022. Off-Policy Evaluation for Large Action Spaces via Embeddings. In Proceedings of the 39th International Conference on Machine Learning. PMLR, 19089\u201319122."},{"key":"e_1_3_2_1_19_1","volume-title":"Recommendations as Treatments: Debiasing Learning and Evaluation. CoRR abs\/1602.05352","author":"Schnabel Tobias","year":"2016","unstructured":"Tobias Schnabel, Adith Swaminathan, Ashudeep Singh, Navin Chandak, and Thorsten Joachims. 2016. Recommendations as Treatments: Debiasing Learning and Evaluation. CoRR abs\/1602.05352 (2016). arXiv:1602.05352http:\/\/arxiv.org\/abs\/1602.05352"},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 2413\u20132423","author":"Sondhi Arjun","year":"2020","unstructured":"Arjun Sondhi, David Arbour, and Drew Dimmery. 2020. Balanced off-policy evaluation in general action spaces. In International Conference on Artificial Intelligence and Statistics. PMLR, 2413\u20132423."},{"key":"e_1_3_2_1_21_1","volume-title":"Learning from logged implicit exploration data. Advances in neural information processing systems 23","author":"Strehl Alex","year":"2010","unstructured":"Alex Strehl, John Langford, Lihong Li, and Sham\u00a0M Kakade. 2010. Learning from logged implicit exploration data. Advances in neural information processing systems 23 (2010)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525791"},{"key":"e_1_3_2_1_23_1","volume-title":"International Conference on Machine Learning. PMLR, 6005\u20136014","author":"Su Yi","year":"2019","unstructured":"Yi Su, Lequn Wang, Michele Santacatterina, and Thorsten Joachims. 2019. Cab: Continuous adaptive blending for policy evaluation and learning. In International Conference on Machine Learning. PMLR, 6005\u20136014."},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Machine Learning. PMLR, 814\u2013823","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. Counterfactual risk minimization: Learning from logged bandit feedback. In International Conference on Machine Learning. PMLR, 814\u2013823."},{"key":"e_1_3_2_1_25_1","volume-title":"The self-normalized estimator for counterfactual learning. advances in neural information processing systems 28","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. The self-normalized estimator for counterfactual learning. advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_1_26_1","volume-title":"International Conference on Machine Learning. PMLR, 2139\u20132148","author":"Thomas Philip","year":"2016","unstructured":"Philip Thomas and Emma Brunskill. 2016. Data-efficient off-policy policy evaluation for reinforcement learning. In International Conference on Machine Learning. PMLR, 2139\u20132148."},{"key":"e_1_3_2_1_27_1","volume-title":"Improved Estimator Selection for Off-Policy Evaluation. In Workshop on Reinforcement Learning Theory at the 38th International Conference on Machine Learning.","author":"Tucker George","year":"2021","unstructured":"George Tucker and Jonathan Lee. 2021. Improved Estimator Selection for Off-Policy Evaluation. In Workshop on Reinforcement Learning Theory at the 38th International Conference on Machine Learning."},{"key":"e_1_3_2_1_28_1","volume-title":"International Conference on Machine Learning. PMLR, 3589\u20133597","author":"Wang Yu-Xiang","year":"2017","unstructured":"Yu-Xiang Wang, Alekh Agarwal, and Miroslav Dud\u0131k. 2017. Optimal and adaptive off-policy evaluation in contextual bandits. In International Conference on Machine Learning. PMLR, 3589\u20133597."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jclinepi.2009.11.020"},{"key":"e_1_3_2_1_30_1","first-page":"19705","article-title":"Counterfactual prediction for bundle treatment","volume":"33","author":"Zou Hao","year":"2020","unstructured":"Hao Zou, Peng Cui, Bo Li, Zheyan Shen, Jianxin Ma, Hongxia Yang, and Yue He. 2020. Counterfactual prediction for bundle treatment. Advances in Neural Information Processing Systems 33 (2020), 19705\u201319715.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0162)","author":"Zou Hao","year":"2022","unstructured":"Hao Zou, Bo Li, Jiangang Han, Shuiping Chen, Xuetao Ding, and Peng Cui. 2022. Counterfactual Prediction for Outcome-Oriented Treatments. In Proceedings of the 39th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0162), Kamalika Chaudhuri, Stefanie Jegelka, Le\u00a0Song, Csaba Szepesvari, Gang Niu, and Sivan Sabato (Eds.). PMLR, 27693\u201327706. https:\/\/proceedings.mlr.press\/v162\/zou22a.html"}],"event":{"name":"WWW '23: The ACM Web Conference 2023","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Austin TX USA","acronym":"WWW '23"},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583448","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583448","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:53Z","timestamp":1750178873000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583448"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":31,"alternative-id":["10.1145\/3543507.3583448","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583448","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}