{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T09:59:35Z","timestamp":1775815175882,"version":"3.50.1"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031560262","type":"print"},{"value":"9783031560279","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-56027-9_7","type":"book-chapter","created":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T07:02:49Z","timestamp":1710831769000},"page":"108-122","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Learning Action Embeddings for\u00a0Off-Policy Evaluation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9225-5155","authenticated-orcid":false,"given":"Matej","family":"Cief","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8053-8318","authenticated-orcid":false,"given":"Jacek","family":"Golebiowski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philipp","family":"Schmidt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2846-1373","authenticated-orcid":false,"given":"Ziawasch","family":"Abedjan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Artur","family":"Bekasov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,3,20]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Chuklin, A.: Click Models for Web Search, vol. 7, no. 3, pp. 1\u2013115 (2015)","DOI":"10.2200\/S00654ED1V01Y201507ICR043"},{"key":"7_CR2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-4548-3","volume-title":"Topics in Advanced Econometrics: Probability Foundations","author":"PJ Dhrymes","year":"1989","unstructured":"Dhrymes, P.J.: Topics in Advanced Econometrics: Probability Foundations, vol. 1. Springer, Heidelberg (1989). https:\/\/doi.org\/10.1007\/978-1-4612-4548-3"},{"key":"7_CR3","doi-asserted-by":"publisher","unstructured":"Dud\u00edk, M., Erhan, D., Langford, J., Li, L.: Doubly robust policy evaluation and optimization. Stat. Sci. 29(4), 485\u2013511 (2014). ISSN 0883\u20134237, 2168\u20138745. https:\/\/doi.org\/10.1214\/14-sts500. https:\/\/projecteuclid.org\/journals\/statistical-science\/volume-29\/issue-4\/Doubly-Robust-Policy-Evaluation-and-Optimization\/10.1214\/14-STS500.full","DOI":"10.1214\/14-sts500"},{"issue":"352","key":"7_CR4","doi-asserted-by":"publisher","first-page":"892","DOI":"10.1080\/01621459.1975.10480319","volume":"70","author":"B Efron","year":"1975","unstructured":"Efron, B.: The efficiency of logistic regression compared to normal discriminant analysis. J. Am. Stat. Assoc. 70(352), 892\u2013898 (1975)","journal-title":"J. Am. Stat. Assoc."},{"key":"7_CR5","unstructured":"Farajtabar, M., Chow, Y., Ghavamzadeh, M.: More robust doubly robust off-policy evaluation. In: Proceedings of the 35th International Conference on Machine Learning, pp. 1447\u20131456. PMLR (2018). https:\/\/proceedings.mlr.press\/v80\/farajtabar18a.html. iSSN: 2640\u20133498"},{"key":"7_CR6","doi-asserted-by":"publisher","unstructured":"Hastie, T., Tibshirani, R., Friedman, J.: The Elements of Statistical Learning. Springer Series in Statistics. Springer, New York (2009). https:\/\/doi.org\/10.1007\/978-0-387-84858-7","DOI":"10.1007\/978-0-387-84858-7"},{"key":"7_CR7","unstructured":"Kallus, N., Zhou, A.: Policy evaluation and optimization with continuous treatments. In: Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics, pp. 1243\u20131251. PMLR (2018). https:\/\/proceedings.mlr.press\/v84\/kallus18a.html. iSSN: 2640\u20133498"},{"key":"7_CR8","unstructured":"Metelli, A.M., Russo, A., Restelli, M.: Subgaussian and differentiable importance sampling for off-policy evaluation and learning. In: Advances in Neural Information Processing Systems, vol. 34, pp. 8119\u20138132. Curran Associates, Inc. (2021). https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/4476b929e30dd0c4e8bdbcc82c6ba23a-Abstract.html"},{"key":"7_CR9","doi-asserted-by":"publisher","unstructured":"Peng, J., et al.: Offline policy evaluation in large action spaces via outcome-oriented action grouping. In: Proceedings of the ACM Web Conference 2023, WWW 2023, pp. 1220\u20131230. Association for Computing Machinery, New York (2023). ISBN 978-1-4503-9416-1. https:\/\/doi.org\/10.1145\/3543507.3583448. https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583448","DOI":"10.1145\/3543507.3583448"},{"key":"7_CR10","doi-asserted-by":"publisher","unstructured":"Robins, J.M., Rotnitzky, A., Zhao, L.P.: Estimation of regression coefficients when some regressors are not always observed. J. Am. Stat. Assoc. 89(427), 846\u2013866 (1994). ISSN 0162\u20131459. https:\/\/doi.org\/10.1080\/01621459.1994.10476818","DOI":"10.1080\/01621459.1994.10476818"},{"key":"7_CR11","doi-asserted-by":"publisher","unstructured":"Sachdeva, N., Su, Y., Joachims, T.: Off-policy bandits with deficient support. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, KDD 2020, pp. 965\u2013975. Association for Computing Machinery, New York (2020). ISBN 978-1-4503-7998-4. https:\/\/doi.org\/10.1145\/3394486.3403139. https:\/\/dl.acm.org\/doi\/10.1145\/3394486.3403139","DOI":"10.1145\/3394486.3403139"},{"key":"7_CR12","doi-asserted-by":"publisher","unstructured":"Saito, Y., Aihara, S., Matsutani, M., Narita, Y.: Open bandit dataset and pipeline: towards realistic and reproducible off-policy evaluation (2021). https:\/\/doi.org\/10.48550\/arXiv.2008.07146. arXiv:2008.07146 [cs, stat]","DOI":"10.48550\/arXiv.2008.07146"},{"key":"7_CR13","unstructured":"Saito, Y., Joachims, T.: Off-policy evaluation for large action spaces via embeddings. In: Proceedings of the 39th International Conference on Machine Learning, pp. 19089\u201319122. PMLR (2022). https:\/\/proceedings.mlr.press\/v162\/saito22a.html. iSSN: 2640\u20133498"},{"key":"7_CR14","unstructured":"Saito, Y., Ren, Q., Joachims, T.: Off-policy evaluation for large action spaces via conjunct effect modeling. In: Proceedings of the 40th International Conference on Machine Learning, pp. 29734\u201329759. PMLR (2023). https:\/\/proceedings.mlr.press\/v202\/saito23b.html. iSSN: 2640\u20133498"},{"key":"7_CR15","unstructured":"Su, Y., Dimakopoulou, M., Krishnamurthy, A., Dudik, M.: Doubly robust off-policy evaluation with shrinkage. In: Proceedings of the 37th International Conference on Machine Learning, pp. 9167\u20139176. PMLR (2020). https:\/\/proceedings.mlr.press\/v119\/su20a.html. iSSN: 2640\u20133498"},{"key":"7_CR16","unstructured":"Su, Y., Wang, L., Santacatterina, M., Joachims, T.: CAB: continuous adaptive blending for policy evaluation and learning. In: Proceedings of the 36th International Conference on Machine Learning, pp. 6005\u20136014. PMLR (2019). https:\/\/proceedings.mlr.press\/v97\/su19a.html. iSSN: 2640\u20133498"},{"key":"7_CR17","unstructured":"Swaminathan, A.: Counterfactual Evaluation and Learning From Logged User Feedback. Ph.D. thesis, Cornell University, Ithaca, NY, United States (2017). https:\/\/ecommons.cornell.edu\/handle\/1813\/51557"},{"key":"7_CR18","unstructured":"Swaminathan, A., Joachims, T.: The self-normalized estimator for counterfactual learning. In: Advances in Neural Information Processing Systems, vol. 28. Curran Associates, Inc. (2015). https:\/\/proceedings.neurips.cc\/paper\/2015\/hash\/39027dfad5138c9ca0c474d71db915c3-Abstract.html"},{"key":"7_CR19","unstructured":"Swaminathan, A., et al.: Off-policy evaluation for slate recommendation. In: Advances in Neural Information Processing Systems, vol. 30. Curran Associates, Inc. (2017). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/hash\/5352696a9ca3397beb79f116f3a33991-Abstract.html"},{"key":"7_CR20","unstructured":"Wang, Y.X., Agarwal, A., Dudik, M.: Optimal and adaptive off-policy evaluation in contextual bandits. In: Proceedings of the 34th International Conference on Machine Learning, pp. 3589\u20133597. PMLR (2017). https:\/\/proceedings.mlr.press\/v70\/wang17a.html. iSSN: 2640\u20133498"},{"key":"7_CR21","doi-asserted-by":"publisher","unstructured":"Zhou, L.: A Survey on Contextual Multi-armed Bandits (2016). https:\/\/doi.org\/10.48550\/arXiv.1508.03326. arXiv:1508.03326 [cs]","DOI":"10.48550\/arXiv.1508.03326"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-56027-9_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T07:05:32Z","timestamp":1710831932000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-56027-9_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031560262","9783031560279"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-56027-9_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"20 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 March 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 March 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.ecir2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"578","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"110","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"69","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31 (Tracks: Workshop, Tutorial, Industry, Doctoral Consortium)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}