{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T05:24:49Z","timestamp":1755926689656,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3539193","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:41Z","timestamp":1660331201000},"page":"3752-3760","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Multi-objective Optimization of Notifications Using Offline Reinforcement Learning"],"prefix":"10.1145","author":[{"given":"Prakruthi","family":"Prabhakar","sequence":"first","affiliation":[{"name":"LinkedIn Corporation, Mountain View, CA, USA"}]},{"given":"Yiping","family":"Yuan","sequence":"additional","affiliation":[{"name":"LinkedIn Corporation, Mountain View, CA, USA"}]},{"given":"Guangyu","family":"Yang","sequence":"additional","affiliation":[{"name":"LinkedIn Corporation, Mountain View, CA, USA"}]},{"given":"Wensheng","family":"Sun","sequence":"additional","affiliation":[{"name":"LinkedIn Corporation, Mountain View, CA, USA"}]},{"given":"Ajith","family":"Muralidharan","sequence":"additional","affiliation":[{"name":"LinkedIn Corporation, Mountain View, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2020408.2020435"},{"key":"e_1_3_2_2_2_1","volume-title":"Proceedings of the 2021 International Conference on Multimodal Interaction. 613--618","author":"\u00d6yk\u00fc Zeynep","year":"2021","unstructured":"\u00d6yk\u00fc Zeynep Bayramoug lu, Engin Erzin, Tevfik Metin Sezgin, and Y\u00fccel Yemez. 2021. Engagement Rewarded Actor-Critic with Conservative Q-Learning for Speech-Driven Laughter Backchannel Generation. In Proceedings of the 2021 International Conference on Multimodal Interaction. 613--618."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_2_4_1","volume-title":"Robonet: Large-scale multi-robot learning. arXiv preprint arXiv:1910.11215","author":"Dasari Sudeep","year":"2019","unstructured":"Sudeep Dasari, Frederik Ebert, Stephen Tian, Suraj Nair, Bernadette Bucher, Karl Schmeckpeper, Siddharth Singh, Sergey Levine, and Chelsea Finn. 2019. Robonet: Large-scale multi-robot learning. arXiv preprint arXiv:1910.11215 (2019)."},{"key":"e_1_3_2_2_5_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Fu Justin","year":"2019","unstructured":"Justin Fu, Aviral Kumar, Matthew Soh, and Sergey Levine. 2019. Diagnosing bottlenecks in deep q-learning algorithms. In International Conference on Machine Learning. PMLR, 2021--2030."},{"key":"e_1_3_2_2_6_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-policy deep reinforcement learning without exploration. In International Conference on Machine Learning. PMLR, 2052--2062."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219880"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3132849"},{"key":"e_1_3_2_2_9_1","volume-title":"et almbox","author":"Ie Eugene","year":"2019","unstructured":"Eugene Ie, Vihan Jain, Jing Wang, Sanmit Narvekar, Ritesh Agarwal, Rui Wu, Heng-Tze Cheng, Morgane Lustman, Vince Gatto, Paul Covington, et almbox. 2019. Reinforcement learning for slate-based recommender systems: A tractable decomposition and practical methodology. arXiv preprint arXiv:1905.12767 (2019)."},{"key":"e_1_3_2_2_10_1","volume-title":"Craig Ferguson, Agata Lapedriza, Noah Jones, Shixiang Gu, and Rosalind Picard.","author":"Jaques Natasha","year":"2019","unstructured":"Natasha Jaques, Asma Ghandeharioun, Judy Hanwen Shen, Craig Ferguson, Agata Lapedriza, Noah Jones, Shixiang Gu, and Rosalind Picard. 2019. Way off-policy batch deep reinforcement learning of implicit human preferences in dialog. arXiv preprint arXiv:1907.00456 (2019)."},{"key":"e_1_3_2_2_11_1","volume-title":"Model Based Reinforcement Learning for Atari. In International Conference on Learning Representations .","author":"Kaiser \u0141ukasz","year":"2019","unstructured":"\u0141ukasz Kaiser, Mohammad Babaeizadeh, Piotr Mi\u0142os, B\u0142a.zej Osi'nski, Roy H Campbell, Konrad Czechowski, Dumitru Erhan, Chelsea Finn, Piotr Kozakowski, Sergey Levine, et almbox. 2019. Model Based Reinforcement Learning for Atari. In International Conference on Learning Representations ."},{"key":"e_1_3_2_2_12_1","volume-title":"Senthil Yogamani, and Patrick P\u00e9rez.","author":"Kiran B Ravi","year":"2021","unstructured":"B Ravi Kiran, Ibrahim Sobh, Victor Talpaert, Patrick Mannion, Ahmad A Al Sallab, Senthil Yogamani, and Patrick P\u00e9rez. 2021. Deep reinforcement learning for autonomous driving: A survey. IEEE Transactions on Intelligent Transportation Systems (2021)."},{"key":"e_1_3_2_2_13_1","volume-title":"Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169","author":"Kostrikov Ilya","year":"2021","unstructured":"Ilya Kostrikov, Ashvin Nair, and Sergey Levine. 2021. Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169 (2021)."},{"key":"e_1_3_2_2_14_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems , Vol. 33 (2020), 1179--1191.","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"Reinforcement learning","author":"Lange Sascha","key":"e_1_3_2_2_15_1","unstructured":"Sascha Lange, Thomas Gabel, and Martin Riedmiller. 2012. Batch reinforcement learning. In Reinforcement learning . Springer, 45--73."},{"key":"e_1_3_2_2_16_1","volume-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643","author":"Levine Sergey","year":"2020","unstructured":"Sergey Levine, Aviral Kumar, George Tucker, and Justin Fu. 2020. Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)."},{"key":"e_1_3_2_2_17_1","volume-title":"Pareto optimality. Pareto optimality, game theory and equilibria","author":"Luc Dinh The","year":"2008","unstructured":"Dinh The Luc. 2008. Pareto optimality. Pareto optimality, game theory and equilibria (2008), 481--515."},{"key":"e_1_3_2_2_18_1","volume-title":"Hado Van Hasselt, and Richard S Sutton","author":"Mahmood Ashique Rupam","year":"2014","unstructured":"Ashique Rupam Mahmood, Hado Van Hasselt, and Richard S Sutton. 2014. Weighted importance sampling for off-policy learning with linear function approximation.. In NIPS. 3014--3022."},{"key":"e_1_3_2_2_19_1","volume-title":"et almbox","author":"Mnih Volodymyr","year":"2015","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Andrei A Rusu, Joel Veness, Marc G Bellemare, Alex Graves, Martin Riedmiller, Andreas K Fidjeland, Georg Ostrovski, et almbox. 2015. Human-level control through deep reinforcement learning. nature , Vol. 518, 7540 (2015), 529--533."},{"key":"e_1_3_2_2_20_1","volume-title":"Multi-objective deep reinforcement learning. arXiv preprint arXiv:1610.02707","author":"Mossalam Hossam","year":"2016","unstructured":"Hossam Mossalam, Yannis M Assael, Diederik M Roijers, and Shimon Whiteson. 2016. Multi-objective deep reinforcement learning. arXiv preprint arXiv:1610.02707 (2016)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3510017"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.14778\/3137765.3137770"},{"key":"e_1_3_2_2_23_1","volume-title":"Eligibility traces for off-policy policy evaluation","author":"Precup Doina","year":"2000","unstructured":"Doina Precup. 2000. Eligibility traces for off-policy policy evaluation. Computer Science Department Faculty Publication Series (2000), 80."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2010.5496972"},{"key":"e_1_3_2_2_25_1","volume-title":"Felix Berkenkamp, Abbas Abdolmaleki, Michael Neunert, Thomas Lampe, Roland Hafner, Nicolas Heess, and Martin Riedmiller.","author":"Siegel Noah Y","year":"2020","unstructured":"Noah Y Siegel, Jost Tobias Springenberg, Felix Berkenkamp, Abbas Abdolmaleki, Michael Neunert, Thomas Lampe, Roland Hafner, Nicolas Heess, and Martin Riedmiller. 2020. Keep doing what worked: Behavioral modelling priors for offline reinforcement learning. arXiv preprint arXiv:2002.08396 (2020)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103535"},{"volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S","key":"e_1_3_2_2_27_1","unstructured":"Richard S Sutton and Andrew G Barto. 1998. Reinforcement learning: An introduction .MIT press Cambridge."},{"key":"e_1_3_2_2_28_1","volume-title":"International Conference on Machine Learning. PMLR, 2139--2148","author":"Thomas Philip","year":"2016","unstructured":"Philip Thomas and Emma Brunskill. 2016. Data-efficient off-policy policy evaluation for reinforcement learning. In International Conference on Machine Learning. PMLR, 2139--2148."},{"key":"e_1_3_2_2_29_1","volume-title":"Deep reinforcement learning and the deadly triad. arXiv preprint arXiv:1812.02648","author":"Hasselt Hado Van","year":"2018","unstructured":"Hado Van Hasselt, Yotam Doron, Florian Strub, Matteo Hessel, Nicolas Sonnerat, and Joseph Modayil. 2018. Deep reinforcement learning and the deadly triad. arXiv preprint arXiv:1812.02648 (2018)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403128"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133025"},{"key":"e_1_3_2_2_33_1","volume-title":"Towards optimal off-policy evaluation for reinforcement learning with marginalized importance sampling. arXiv preprint arXiv:1906.03393","author":"Xie Tengyang","year":"2019","unstructured":"Tengyang Xie, Yifei Ma, and Yu-Xiang Wang. 2019. Towards optimal off-policy evaluation for reinforcement learning with marginalized importance sampling. arXiv preprint arXiv:1906.03393 (2019)."},{"key":"e_1_3_2_2_34_1","volume-title":"Offline Reinforcement Learning for Mobile Notifications. arXiv preprint arXiv:2202.03867","author":"Yuan Yiping","year":"2022","unstructured":"Yiping Yuan, Muralidharanm Ajith, Preetam Nandy, Miao Cheng, and Prakruthi Prabhakar. 2022. Offline Reinforcement Learning for Mobile Notifications. arXiv preprint arXiv:2202.03867 (2022)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290981"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219906"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Washington DC USA","acronym":"KDD '22"},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539193","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3539193","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:59Z","timestamp":1750186979000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539193"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":38,"alternative-id":["10.1145\/3534678.3539193","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3539193","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}