{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T15:51:31Z","timestamp":1776527491918,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,8,20]],"date-time":"2020-08-20T00:00:00Z","timestamp":1597881600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100011878","name":"Vlaamse regering","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100011878","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,8,23]]},"DOI":"10.1145\/3394486.3403175","type":"proceedings-article","created":{"date-parts":[[2020,8,20]],"date-time":"2020-08-20T23:17:27Z","timestamp":1597965447000},"page":"1223-1233","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":19,"title":["Joint Policy-Value Learning for Recommendation"],"prefix":"10.1145","author":[{"given":"Olivier","family":"Jeunen","sequence":"first","affiliation":[{"name":"University of Antwerp, Antwerp, Belgium"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Rohde","sequence":"additional","affiliation":[{"name":"Criteo AI Lab, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Flavian","family":"Vasile","sequence":"additional","affiliation":[{"name":"Criteo AI Lab, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martin","family":"Bompaire","sequence":"additional","affiliation":[{"name":"Criteo AI Lab, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,8,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Proc. of the 2019 World Wide Web Conference (WWW '19)","author":"Agarwal A.","unstructured":"A. Agarwal , X. Wang , C. Li , M. Bendersky , and M. Najork . 2019. Addressing Trust Bias for Unbiased Learning-to-Rank . In Proc. of the 2019 World Wide Web Conference (WWW '19) . ACM, 4--14. A. Agarwal, X. Wang, C. Li, M. Bendersky, and M. Najork. 2019. Addressing Trust Bias for Unbiased Learning-to-Rank. In Proc. of the 2019 World Wide Web Conference (WWW '19). ACM, 4--14."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.5555\/2567709.2567766"},{"key":"e_1_3_2_2_3_1","volume-title":"Proc. of the 24th International Conference on Neural Information Processing Systems(NIPS'11)","author":"Chapelle O.","unstructured":"O. Chapelle and L. Li . 2011. An Empirical Evaluation of Thompson Sampling . In Proc. of the 24th International Conference on Neural Information Processing Systems(NIPS'11) . 2249--2257. O. Chapelle and L. Li. 2011. An Empirical Evaluation of Thompson Sampling. In Proc. of the 24th International Conference on Neural Information Processing Systems(NIPS'11). 2249--2257."},{"key":"e_1_3_2_2_4_1","volume-title":"Proc. of the 12th ACM International Conference on Web Search and Data Mining (WSDM '19)","author":"Chen M.","unstructured":"M. Chen , A. Beutel , P. Covington , S. Jain , F. Belletti , and E. H. Chi . 2019. Top-K Off-Policy Correction for a REINFORCE Recommender System . In Proc. of the 12th ACM International Conference on Web Search and Data Mining (WSDM '19) . ACM, 456--464. M. Chen, A. Beutel, P. Covington, S. Jain, F. Belletti, and E. H. Chi. 2019. Top-K Off-Policy Correction for a REINFORCE Recommender System. In Proc. of the 12th ACM International Conference on Web Search and Data Mining (WSDM '19). ACM, 456--464."},{"key":"e_1_3_2_2_5_1","volume-title":"Proc. of the 13th ACM Conference on Recommender Systems (RecSys '19)","author":"Dacrema M. F.","unstructured":"M. F. Dacrema , P. Cremonesi , and D. Jannach . 2019. Are We Really Making Much Progress? A Worrying Analysis of Recent Neural Recommendation Approaches . In Proc. of the 13th ACM Conference on Recommender Systems (RecSys '19) . ACM,101--109. M. F. Dacrema, P. Cremonesi, and D. Jannach. 2019. Are We Really Making Much Progress? A Worrying Analysis of Recent Neural Recommendation Approaches. In Proc. of the 13th ACM Conference on Recommender Systems (RecSys '19). ACM,101--109."},{"key":"e_1_3_2_2_6_1","volume-title":"Proc. of the 28th International Conference on International Conference on Machine Learning (ICML'11)","author":"Dud\u00edk M.","unstructured":"M. Dud\u00edk , J. Langford , and L. Li . 2011. Doubly Robust Policy Evaluation and Learning . In Proc. of the 28th International Conference on International Conference on Machine Learning (ICML'11) . 1097--1104. M. Dud\u00edk, J. Langford, and L. Li. 2011. Doubly Robust Policy Evaluation and Learning. In Proc. of the 28th International Conference on International Conference on Machine Learning (ICML'11). 1097--1104."},{"key":"e_1_3_2_2_7_1","volume-title":"More Robust Doubly Robust Off-policy Evaluation. In Proc. of the 35th International Conference on Machine Learning (ICML'18","volume":"1456","author":"Farajtabar M.","unstructured":"M. Farajtabar , Y. Chow , and M. Ghavamzadeh . 2018 . More Robust Doubly Robust Off-policy Evaluation. In Proc. of the 35th International Conference on Machine Learning (ICML'18 , Vol. 80). PMLR, 1447-- 1456 . M. Farajtabar, Y. Chow, and M. Ghavamzadeh. 2018. More Robust Doubly Robust Off-policy Evaluation. In Proc. of the 35th International Conference on Machine Learning (ICML'18, Vol. 80). PMLR, 1447--1456."},{"key":"e_1_3_2_2_8_1","volume-title":"Ch. In Proc. of the 8th ACM Conference on Recommender Systems (RecSys '14)","author":"Garcin F.","unstructured":"F. Garcin , B. Faltings , O. Donatsch , A. Alazzawi , C. Bruttin , and A. Huber . 2014. Offline and Online Evaluation of News Recommender Systems at Swiss info . Ch. In Proc. of the 8th ACM Conference on Recommender Systems (RecSys '14) . 169--176. F. Garcin, B. Faltings, O. Donatsch, A. Alazzawi, C. Bruttin, and A. Huber. 2014. Offline and Online Evaluation of News Recommender Systems at Swiss info. Ch. In Proc. of the 8th ACM Conference on Recommender Systems (RecSys '14). 169--176."},{"key":"e_1_3_2_2_9_1","volume-title":"InProc. of the Eleventh ACM International Conference on Web Search and Data Mining (WSDM '18)","author":"Gilotte A.","unstructured":"A. Gilotte , C. Calauz\u00e8nes , T. Nedelec , A. Abraham , and S. Doll\u00e9 . 2018. Offline A\/B Testing for Recommender Systems . InProc. of the Eleventh ACM International Conference on Web Search and Data Mining (WSDM '18) . ACM, 198--206. A. Gilotte, C. Calauz\u00e8nes, T. Nedelec, A. Abraham, and S. Doll\u00e9. 2018. Offline A\/B Testing for Recommender Systems. InProc. of the Eleventh ACM International Conference on Web Search and Data Mining (WSDM '18). ACM, 198--206."},{"key":"e_1_3_2_2_10_1","volume-title":"Proceedings of the 12th ACM International Conference on Web Search and Data Mining (WSDM '19)","author":"Gruson A.","unstructured":"A. Gruson , P. Chandar , C. Charbuillet , J. McInerney , S. Hansen , D. Tardieu , and B. Carterette . 2019. Offline Evaluation to Make Decisions About Playlist Recommendation Algorithms . In Proceedings of the 12th ACM International Conference on Web Search and Data Mining (WSDM '19) . ACM, 420--428. A. Gruson, P. Chandar, C. Charbuillet, J. McInerney, S. Hansen, D. Tardieu, and B. Carterette. 2019. Offline Evaluation to Make Decisions About Playlist Recommendation Algorithms. In Proceedings of the 12th ACM International Conference on Web Search and Data Mining (WSDM '19). ACM, 420--428."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"D. Hosmer Jr. S. Lemeshow and R. Sturdivant. 2013.Applied logistic regression. Vol. 398. John Wiley & Sons.  D. Hosmer Jr. S. Lemeshow and R. Sturdivant. 2013.Applied logistic regression. Vol. 398. John Wiley & Sons.","DOI":"10.1002\/9781118548387"},{"key":"e_1_3_2_2_12_1","unstructured":"E. Ie C. Hsu M. Mladenov V. Jain S. Narvekar J. Wang R. Wu and C. Boutilier. 2019. RecSim: A Configurable Simulation Platform for Recommender Systems. arXiv:1909.04847 [cs.LG]  E. Ie C. Hsu M. Mladenov V. Jain S. Narvekar J. Wang R. Wu and C. Boutilier. 2019. RecSim: A Configurable Simulation Platform for Recommender Systems. arXiv:1909.04847 [cs.LG]"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331269"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3347069"},{"key":"e_1_3_2_2_15_1","volume-title":"Bandit Feedback: An Overview of the State-of-the-art. arXiv:1909.08471 [cs.IR]","author":"Jeunen O.","year":"2019","unstructured":"O. Jeunen , D. Mykhaylov , D. Rohde , F. Vasile , A. Gilotte , and M. Bompaire . 2019 . Learning from Bandit Feedback: An Overview of the State-of-the-art. arXiv:1909.08471 [cs.IR] O. Jeunen, D. Mykhaylov, D. Rohde, F. Vasile, A. Gilotte, and M. Bompaire. 2019. Learning from Bandit Feedback: An Overview of the State-of-the-art. arXiv:1909.08471 [cs.IR]"},{"key":"e_1_3_2_2_16_1","unstructured":"O. Jeunen D. Rohde and F. Vasile. 2019. On the Value of Bandit Feedback for Offline Recommender System Evaluation. arXiv:1907.12384 [cs.IR]  O. Jeunen D. Rohde and F. Vasile. 2019. On the Value of Bandit Feedback for Offline Recommender System Evaluation. arXiv:1907.12384 [cs.IR]"},{"key":"e_1_3_2_2_17_1","volume-title":"Proc. of the 6th International Conference on Learning Representations (ICLR '18)","author":"Joachims T.","year":"2018","unstructured":"T. Joachims , A. Swaminathan , and M. de Rijke . 2018 . Deep Learning with Logged Bandit Feedback . In Proc. of the 6th International Conference on Learning Representations (ICLR '18) . T. Joachims, A. Swaminathan, and M. de Rijke. 2018. Deep Learning with Logged Bandit Feedback. In Proc. of the 6th International Conference on Learning Representations (ICLR '18)."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"e_1_3_2_2_19_1","volume-title":"InProc. of the 28th International Conference on International Conference on Machine Learning (ICML '11)","author":"Le Q.","unstructured":"Q. Le , J. Ngiam , A. Coates , A. Lahiri , B. Prochnow , and A. Ng . 2011. On optimization methods for deep learning . InProc. of the 28th International Conference on International Conference on Machine Learning (ICML '11) . 265--272. Q. Le, J. Ngiam, A. Coates, A. Lahiri, B. Prochnow, and A. Ng. 2011. On optimization methods for deep learning. InProc. of the 28th International Conference on International Conference on Machine Learning (ICML '11). 265--272."},{"key":"e_1_3_2_2_20_1","unstructured":"D. Lefortier A. Swaminathan X. Gu T. Joachims and M. de Rijke. 2016. Large-scale validation of counterfactual learning methods: A test-bed. arXiv preprint arXiv:1612.00367(2016).  D. Lefortier A. Swaminathan X. Gu T. Joachims and M. de Rijke. 2016. Large-scale validation of counterfactual learning methods: A test-bed. arXiv preprint arXiv:1612.00367(2016)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"A. S. Lewis and M. L. Overton. 2013. Nonsmooth optimization via quasi-Newton methods. Mathematical Programming141 1--2 (2013) 135--163.  A. S. Lewis and M. L. Overton. 2013. Nonsmooth optimization via quasi-Newton methods. Mathematical Programming141 1--2 (2013) 135--163.","DOI":"10.1007\/s10107-012-0514-2"},{"key":"e_1_3_2_2_22_1","volume-title":"Proc. of the 19th International Conference on World Wide Web (WWW '10)","author":"Li L.","unstructured":"L. Li , W. Chu , J. Langford , and R. E. Schapire . 2010. A Contextual-Bandit Approach to Personalized News Article Recommendation . In Proc. of the 19th International Conference on World Wide Web (WWW '10) . ACM, 661--670. L. Li, W. Chu, J. Langford, and R. E. Schapire. 2010. A Contextual-Bandit Approach to Personalized News Article Recommendation. In Proc. of the 19th International Conference on World Wide Web (WWW '10). ACM, 661--670."},{"key":"e_1_3_2_2_23_1","volume-title":"Proc. of the 39th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR '16)","author":"Li S.","unstructured":"S. Li , A. Karatzoglou , and C. Gentile . 2016. Collaborative Filtering Bandits . In Proc. of the 39th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR '16) . ACM, 539--548. S. Li, A. Karatzoglou, and C. Gentile. 2016. Collaborative Filtering Bandits. In Proc. of the 39th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR '16). ACM, 539--548."},{"key":"e_1_3_2_2_24_1","volume-title":"Proc. of the 2018 World Wide Web Conference(WWW '18)","author":"Liang D.","unstructured":"D. Liang , R. G. Krishnan , M. D Hoffman , and T. Jebara . 2018. Variational autoencoders for collaborative filtering . In Proc. of the 2018 World Wide Web Conference(WWW '18) . ACM, 689--698. D. Liang, R. G. Krishnan, M. D Hoffman, and T. Jebara. 2018. Variational autoencoders for collaborative filtering. In Proc. of the 2018 World Wide Web Conference(WWW '18). ACM, 689--698."},{"key":"e_1_3_2_2_25_1","volume-title":"Bayesian Counterfactual Risk Minimization. In Proc. of the 36th International Conference on Machine Learning (ICML '19","volume":"4133","author":"London B.","unstructured":"B. London and T. Sandler . 2019 . Bayesian Counterfactual Risk Minimization. In Proc. of the 36th International Conference on Machine Learning (ICML '19 , Vol. 97). PMLR, 4125-- 4133 . B. London and T. Sandler. 2019. Bayesian Counterfactual Risk Minimization. In Proc. of the 36th International Conference on Machine Learning (ICML '19, Vol. 97). PMLR, 4125--4133."},{"key":"e_1_3_2_2_26_1","volume-title":"Proc. of the 2020World Wide Web Conference (WWW '20)","author":"Ma J.","unstructured":"J. Ma , Z. Zhao , X. Yi , J. Yang , M. Chen , J. Tang , L. Hong , and E. H. Chi . 2020. Off-Policy Learning in Two-Stage Recommender Systems . In Proc. of the 2020World Wide Web Conference (WWW '20) . ACM. J. Ma, Z. Zhao, X. Yi, J. Yang, M. Chen, J. Tang, L. Hong, and E. H. Chi. 2020. Off-Policy Learning in Two-Stage Recommender Systems. In Proc. of the 2020World Wide Web Conference (WWW '20). ACM."},{"key":"e_1_3_2_2_27_1","volume-title":"Imitation-Regularized Offline Learning. In Proc. of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS) (AI Stats '19","volume":"2965","author":"Ma Y.","unstructured":"Y. Ma , Y. Wang , and B. Narayanaswamy . 2019 . Imitation-Regularized Offline Learning. In Proc. of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS) (AI Stats '19 , Vol. 89). PMLR, 2956-- 2965 . Y. Ma, Y. Wang, and B. Narayanaswamy. 2019. Imitation-Regularized Offline Learning. In Proc. of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS) (AI Stats '19, Vol. 89). PMLR, 2956--2965."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2488200"},{"key":"e_1_3_2_2_29_1","unstructured":"D. Mykhaylov D. Rohde F. Vasile M. Bompaire and O. Jeunen. 2019. Three Methods for Training on Bandit Feedback. arXiv:1904.10799 [cs.IR]  D. Mykhaylov D. Rohde F. Vasile M. Bompaire and O. Jeunen. 2019. Three Methods for Training on Bandit Feedback. arXiv:1904.10799 [cs.IR]"},{"key":"e_1_3_2_2_30_1","unstructured":"A. B. Owen. 2013.Monte Carlo theory methods and examples.  A. B. Owen. 2013.Monte Carlo theory methods and examples."},{"key":"e_1_3_2_2_31_1","first-page":"8026","article-title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","volume":"32","author":"Paszke A.","year":"2019","unstructured":"A. Paszke , S. Gross , F. Massa , A. Lerer , J. Bradbury , G. Chanan , T. Killeen , Z. Lin , N. Gimelshein , L. Antiga , A. Desmaison , A. Kopf , E. Yang , Z. DeVito , M. Raison , A. Tejani , S. Chilamkurthy , B. Steiner , L. Fang , J. Bai , and S. Chintala . 2019 . PyTorch: An Imperative Style, High-Performance Deep Learning Library . In Advances in Neural Information Processing Systems 32. 8026 -- 8037 . A. Paszke, S. Gross, F. Massa, A. Lerer, J. Bradbury, G. Chanan, T. Killeen, Z. Lin,N. Gimelshein, L. Antiga, A. Desmaison, A. Kopf, E. Yang, Z. DeVito, M. Raison, A. Tejani, S. Chilamkurthy, B. Steiner, L. Fang, J. Bai, and S. Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32. 8026--8037.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_32_1","unstructured":"S. Rendle. 2019. Evaluation Metrics for Item Recommendation under Sampling. arXiv:1912.02263 [cs.IR]  S. Rendle. 2019. Evaluation Metrics for Item Recommendation under Sampling. arXiv:1912.02263 [cs.IR]"},{"key":"e_1_3_2_2_33_1","unstructured":"S. Rendle L. Zhang and Y. Koren. 2019. On the Difficulty of Evaluating Baselines: A Study on Recommender Systems. arXiv:1905.01395 [cs.IR]  S. Rendle L. Zhang and Y. Koren. 2019. On the Difficulty of Evaluating Baselines: A Study on Recommender Systems. arXiv:1905.01395 [cs.IR]"},{"key":"e_1_3_2_2_34_1","unstructured":"D. Rohde S. Bonner T. Dunlop F. Vasile and A. Karatzoglou. 2018. RecoGym: A Reinforcement Learning Environment for the problem of Product Recommendation in Online Advertising. ArXiv e-prints(Aug. 2018). arXiv:1808.00720 [cs.IR]  D. Rohde S. Bonner T. Dunlop F. Vasile and A. Karatzoglou. 2018. RecoGym: A Reinforcement Learning Environment for the problem of Product Recommendation in Online Advertising. ArXiv e-prints(Aug. 2018). arXiv:1808.00720 [cs.IR]"},{"key":"e_1_3_2_2_35_1","volume-title":"Proc. of the 10th ACM Conference on Recommender Systems (RecSys '16)","author":"Rossetti M.","unstructured":"M. Rossetti , F. Stella , and M. Zanker . 2016. Contrasting Offline and Online Results when Evaluating Recommendation Algorithms . In Proc. of the 10th ACM Conference on Recommender Systems (RecSys '16) . ACM, 31--34. M. Rossetti, F. Stella, and M. Zanker. 2016. Contrasting Offline and Online Results when Evaluating Recommendation Algorithms. In Proc. of the 10th ACM Conference on Recommender Systems (RecSys '16). ACM, 31--34."},{"key":"e_1_3_2_2_36_1","volume-title":"Proc. of the 26th ACM Conference on Knowledge Discovery & Data Mining (KDD '20)","author":"Sakhi O.","unstructured":"O. Sakhi , S. Bonner , D. Rohde , and F. Vasile . 2020. BLOB : A Probabilistic Model for Recommendation that Combines Organic and Bandit Signals . In Proc. of the 26th ACM Conference on Knowledge Discovery & Data Mining (KDD '20) . ACM. O. Sakhi, S. Bonner, D. Rohde, and F. Vasile. 2020. BLOB : A Probabilistic Model for Recommendation that Combines Organic and Bandit Signals. In Proc. of the 26th ACM Conference on Knowledge Discovery & Data Mining (KDD '20). ACM."},{"key":"e_1_3_2_2_37_1","volume-title":"Proc. of the 25th International Conference on Machine Learning (ICML '08)","author":"Salakhutdinov R.","unstructured":"R. Salakhutdinov and A. Mnih . 2008. Bayesian Probabilistic Matrix Factorization Using Markov Chain Monte Carlo . In Proc. of the 25th International Conference on Machine Learning (ICML '08) . ACM, 880--887. R. Salakhutdinov and A. Mnih. 2008. Bayesian Probabilistic Matrix Factorization Using Markov Chain Monte Carlo. In Proc. of the 25th International Conference on Machine Learning (ICML '08). ACM, 880--887."},{"key":"e_1_3_2_2_38_1","volume-title":"Proc. of the 13th International Conference on Web Search and Data Mining (WSDM '20)","author":"Shenbin I.","unstructured":"I. Shenbin , A. Alekseev , E. Tutubalina , V. Malykh , and S. I. Nikolenko . 2020. RecVAE: A New Variational Autoencoder for Top-N Recommendations with Implicit Feedback . In Proc. of the 13th International Conference on Web Search and Data Mining (WSDM '20) . ACM, 528--536. I. Shenbin, A. Alekseev, E. Tutubalina, V. Malykh, and S. I. Nikolenko. 2020. RecVAE: A New Variational Autoencoder for Top-N Recommendations with Implicit Feedback. In Proc. of the 13th International Conference on Web Search and Data Mining (WSDM '20). ACM, 528--536."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0378-3758(00)00115-4"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.1050.0451"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2507157.2507160"},{"key":"e_1_3_2_2_42_1","volume-title":"Embarrassingly Shallow Autoencoders for Sparse Data. In The World Wide Web Conference (WWW '19)","author":"Steck H.","year":"2019","unstructured":"H. Steck . 2019 . Embarrassingly Shallow Autoencoders for Sparse Data. In The World Wide Web Conference (WWW '19) . ACM, 3251--3257. H. Steck. 2019. Embarrassingly Shallow Autoencoders for Sparse Data. In The World Wide Web Conference (WWW '19). ACM, 3251--3257."},{"key":"e_1_3_2_2_43_1","unstructured":"A. Storkey. 2009. When training and test sets are different: characterizing learning transfer. Dataset shift in machine learning(2009) 3--28.  A. Storkey. 2009. When training and test sets are different: characterizing learning transfer. Dataset shift in machine learning(2009) 3--28."},{"key":"e_1_3_2_2_44_1","volume-title":"CAB: Continuous Adaptive Blending for Policy Evaluation and Learning. In International Conference on Machine Learning (ICML'19)","author":"Su Y.","unstructured":"Y. Su , L. Wang , M. Santacatterina , and T. Joachims . 2019 . CAB: Continuous Adaptive Blending for Policy Evaluation and Learning. In International Conference on Machine Learning (ICML'19) . 6005--6014. Y. Su, L. Wang, M. Santacatterina, and T. Joachims. 2019. CAB: Continuous Adaptive Blending for Policy Evaluation and Learning. In International Conference on Machine Learning (ICML'19). 6005--6014."},{"key":"e_1_3_2_2_45_1","unstructured":"R. S. Sutton and A. G. Barto. 1998. Introduction to reinforcement learning. Vol. 135.  R. S. Sutton and A. G. Barto. 1998. Introduction to reinforcement learning. Vol. 135."},{"key":"e_1_3_2_2_46_1","unstructured":"R. S. Sutton D. McAllester S. Singh and Y. Mansour. 1999. Policy Gradient Methods for Reinforcement Learning with Function Approximation. In Advances in Neural Information Processing Systems (NIPS'99). 1057--1063.  R. S. Sutton D. McAllester S. Singh and Y. Mansour. 1999. Policy Gradient Methods for Reinforcement Learning with Function Approximation. In Advances in Neural Information Processing Systems (NIPS'99). 1057--1063."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886805"},{"key":"e_1_3_2_2_48_1","unstructured":"A. Swaminathan and T. Joachims. 2015. The Self-Normalized Estimator for Counterfactual Learning. In Advances in Neural Information Processing Systems. 3231--3239.  A. Swaminathan and T. Joachims. 2015. The Self-Normalized Estimator for Counterfactual Learning. In Advances in Neural Information Processing Systems. 3231--3239."},{"key":"e_1_3_2_2_49_1","volume-title":"On the Design of Estimators for Bandit Off-Policy Evaluation. In Proc. of the 36th International Conference on Machine Learning (ICML'19","volume":"6476","author":"Vlassis N.","unstructured":"N. Vlassis , A. Bibaut , M. Dimakopoulou , and T. Jebara . 2019 . On the Design of Estimators for Bandit Off-Policy Evaluation. In Proc. of the 36th International Conference on Machine Learning (ICML'19 , Vol. 97). PMLR, 6468-- 6476 . N. Vlassis, A. Bibaut, M. Dimakopoulou, and T. Jebara. 2019. On the Design of Estimators for Bandit Off-Policy Evaluation. In Proc. of the 36th International Conference on Machine Learning (ICML'19, Vol. 97). PMLR, 6468--6476."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"e_1_3_2_2_51_1","volume-title":"Journal of Machine Learning Research11","author":"Yu J.","year":"2010","unstructured":"J. Yu , S.V.N. Vishwanathan , S. G\u00fcnter , and N. Schraudolph . 2010. A quasi-Newton approach to non-smooth convex optimization problems in machine learning . Journal of Machine Learning Research11 , Mar ( 2010 ), 1145--1200. J. Yu, S.V.N. Vishwanathan, S. G\u00fcnter, and N. Schraudolph. 2010. A quasi-Newton approach to non-smooth convex optimization problems in machine learning. Journal of Machine Learning Research11, Mar (2010), 1145--1200."},{"key":"e_1_3_2_2_52_1","volume-title":"Proc. of the 25th ACM Conference on Knowledge Discovery & Data Mining (KDD '19)","author":"Zou H.","unstructured":"H. Zou , K. Kuang , B. Chen , P. Chen , and P. Cui . 2019. Focused Context Balancing for Robust Offline Policy Evaluation . In Proc. of the 25th ACM Conference on Knowledge Discovery & Data Mining (KDD '19) . ACM, 696--704. H. Zou, K. Kuang, B. Chen, P. Chen, and P. Cui. 2019. Focused Context Balancing for Robust Offline Policy Evaluation. In Proc. of the 25th ACM Conference on Knowledge Discovery & Data Mining (KDD '19). ACM, 696--704."}],"event":{"name":"KDD '20: The 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Virtual Event CA USA","acronym":"KDD '20","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394486.3403175","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3394486.3403175","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:31:34Z","timestamp":1750195894000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394486.3403175"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8,20]]},"references-count":52,"alternative-id":["10.1145\/3394486.3403175","10.1145\/3394486"],"URL":"https:\/\/doi.org\/10.1145\/3394486.3403175","relation":{},"subject":[],"published":{"date-parts":[[2020,8,20]]},"assertion":[{"value":"2020-08-20","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}