{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:08:16Z","timestamp":1775815696041,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,11,3]],"date-time":"2019-11-03T00:00:00Z","timestamp":1572739200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,11,3]]},"DOI":"10.1145\/3357384.3358031","type":"proceedings-article","created":{"date-parts":[[2019,11,4]],"date-time":"2019-11-04T14:11:35Z","timestamp":1572876695000},"page":"971-980","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":15,"title":["Model-based Constrained MDP for Budget Allocation in Sequential Incentive Marketing"],"prefix":"10.1145","author":[{"given":"Shuai","family":"Xiao","sequence":"first","affiliation":[{"name":"Ant Financial Services Group, Shanghai, China"}]},{"given":"Le","family":"Guo","sequence":"additional","affiliation":[{"name":"Ant Financial Services Group, Beijing, China"}]},{"given":"Zaifan","family":"Jiang","sequence":"additional","affiliation":[{"name":"Ant Financial Services Group, Beijing, China"}]},{"given":"Lei","family":"Lv","sequence":"additional","affiliation":[{"name":"Ant Financial Services Group, Beijing, China"}]},{"given":"Yuanbo","family":"Chen","sequence":"additional","affiliation":[{"name":"Ant Financial Services Group, Beijing, China"}]},{"given":"Jun","family":"Zhu","sequence":"additional","affiliation":[{"name":"Ant Financial Services Group, Beijing, China"}]},{"given":"Shuang","family":"Yang","sequence":"additional","affiliation":[{"name":"Ant Financial Services Group, San Mateo, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2019,11,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305381.3305384"},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Machine Learning. 1638--1646","author":"Agarwal Alekh","year":"2014"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","volume-title":"Constrained Markov decision processes","author":"Altman Eitan","DOI":"10.1201\/9781315140223"},{"key":"e_1_3_2_1_4_1","volume-title":"Convex optimization","author":"Boyd Stephen"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 28th International Conference on machine learning (ICML-11)","author":"Deisenroth Marc","year":"2011"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1214\/14-STS500"},{"key":"e_1_3_2_1_8_1","volume-title":"More Robust Doubly Robust Off-policy Evaluation. In International Conference on Machine Learning . 1446--1455","author":"Farajtabar Mehrdad","year":"2018"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/1622519.1622522"},{"key":"e_1_3_2_1_10_1","volume-title":"World models. arXiv preprint arXiv:1803.10122","author":"Ha David","year":"2018"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305381.3305521"},{"key":"e_1_3_2_1_12_1","volume-title":"Learning latent dynamics for planning from pixels. arXiv preprint arXiv:1811.04551","author":"Hafner Danijar","year":"2018"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2648584.2648589"},{"key":"e_1_3_2_1_14_1","volume-title":"Doubly Robust Off-policy Value Evaluation for Reinforcement Learning. In International Conference on Machine Learning. 652--661","author":"Jiang Nan","year":"2016"},{"key":"e_1_3_2_1_15_1","unstructured":"Thorsten Joachims Adith Swaminathan and Maarten de Rijke. 2018. Deep learning with logged bandit feedback. (2018).  Thorsten Joachims Adith Swaminathan and Maarten de Rijke. 2018. Deep learning with logged bandit feedback. (2018)."},{"key":"e_1_3_2_1_16_1","volume-title":"Cost-Effective Incentive Allocation via Structured Counterfactual Inference. arXiv preprint arXiv:1902.02495","author":"Lopez Romain","year":"2019"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2488200"},{"key":"e_1_3_2_1_18_1","volume-title":"et almbox","author":"Mnih Volodymyr","year":"2015"},{"key":"e_1_3_2_1_19_1","unstructured":"Ofir Nachum Mohammad Norouzi Kelvin Xu and Dale Schuurmans. 2017. Bridging the gap between value and policy based reinforcement learning. In Advances in Neural Information Processing Systems. 2775--2785.  Ofir Nachum Mohammad Norouzi Kelvin Xu and Dale Schuurmans. 2017. Bridging the gap between value and policy based reinforcement learning. In Advances in Neural Information Processing Systems. 2775--2785."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"e_1_3_2_1_21_1","volume-title":"Eligibility traces for off-policy policy evaluation","author":"Precup Doina","year":"2000"},{"key":"e_1_3_2_1_22_1","volume-title":"Equivalence between policy gradients and soft q-learning. arXiv preprint arXiv:1704.06440","author":"Schulman John","year":"2017"},{"key":"e_1_3_2_1_23_1","volume-title":"International Conference on Machine Learning . 1889--1897","author":"Schulman John","year":"2015"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886805"},{"key":"e_1_3_2_1_25_1","volume-title":"International Conference on Machine Learning. 814--823","author":"Swaminathan Adith","year":"2015"},{"key":"e_1_3_2_1_26_1","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015c. The self-normalized estimator for counterfactual learning. In advances in neural information processing systems. 3231--3239.  Adith Swaminathan and Thorsten Joachims. 2015c. The self-normalized estimator for counterfactual learning. In advances in neural information processing systems. 3231--3239."},{"key":"e_1_3_2_1_27_1","volume-title":"International Conference on Machine Learning. 2139--2148","author":"Thomas Philip","year":"2016"},{"key":"e_1_3_2_1_28_1","volume-title":"Marc Lanctot, and Nando De Freitas.","author":"Wang Ziyu","year":"2015"},{"key":"e_1_3_2_1_29_1","unstructured":"Manuel Watter Jost Springenberg Joschka Boedecker and Martin Riedmiller. 2015. Embed to control: A locally linear latent dynamics model for control from raw images. In Advances in neural information processing systems. 2746--2754.  Manuel Watter Jost Springenberg Joschka Boedecker and Martin Riedmiller. 2015. Embed to control: A locally linear latent dynamics model for control from raw images. In Advances in neural information processing systems. 2746--2754."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271748"}],"event":{"name":"CIKM '19: The 28th ACM International Conference on Information and Knowledge Management","location":"Beijing China","acronym":"CIKM '19","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 28th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3357384.3358031","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3357384.3358031","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:44:18Z","timestamp":1750203858000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3357384.3358031"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11,3]]},"references-count":30,"alternative-id":["10.1145\/3357384.3358031","10.1145\/3357384"],"URL":"https:\/\/doi.org\/10.1145\/3357384.3358031","relation":{},"subject":[],"published":{"date-parts":[[2019,11,3]]},"assertion":[{"value":"2019-11-03","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}