{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:17:12Z","timestamp":1757312232938,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T00:00:00Z","timestamp":1694649600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,9,14]]},"DOI":"10.1145\/3604915.3608873","type":"proceedings-article","created":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T22:40:23Z","timestamp":1694731223000},"page":"396-399","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Reward innovation for long-term member satisfaction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-0479-7914","authenticated-orcid":false,"given":"Gary","family":"Tang","sequence":"first","affiliation":[{"name":"Algorithms Engineering, Netflix, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0397-8971","authenticated-orcid":false,"given":"Jiangwei","family":"Pan","sequence":"additional","affiliation":[{"name":"Netflix, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5475-8690","authenticated-orcid":false,"given":"Henry","family":"Wang","sequence":"additional","affiliation":[{"name":"Netflix, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3005-5200","authenticated-orcid":false,"given":"Justin","family":"Basilico","sequence":"additional","affiliation":[{"name":"Netflix, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,9,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3547407"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240370"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623634"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474236"},{"key":"e_1_3_2_1_6_1","volume-title":"Reward shaping for user satisfaction in a reinforce recommender. arXiv preprint arXiv:2209.15166","author":"Christakopoulou Konstantina","year":"2022","unstructured":"Konstantina Christakopoulou, Can Xu, Sai Zhang, Sriraj Badam, Trevor Potter, Daniel Li, Hao Wan, Xinyang Yi, Ya Le, Chris Berg, 2022. Reward shaping for user satisfaction in a reinforce recommender. arXiv preprint arXiv:2209.15166 (2022)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1864708.1864770"},{"key":"e_1_3_2_1_9_1","volume-title":"Robot shaping: Developing autonomous agents through learning. Artificial intelligence 71, 2","author":"Dorigo Marco","year":"1994","unstructured":"Marco Dorigo and Marco Colombetti. 1994. Robot shaping: Developing autonomous agents through learning. Artificial intelligence 71, 2 (1994), 321\u2013370."},{"key":"e_1_3_2_1_10_1","unstructured":"Akos\u00a0Lada et al. 2021. How machine learning powers Facebook\u2019s News Feed ranking algorithm. https:\/\/engineering.fb.com\/2021\/01\/26\/ml-applications\/news-feed-ranking\/"},{"key":"e_1_3_2_1_11_1","volume-title":"2016. Know Your Customers","author":"Clayton\u00a0Christensen","year":"2016","unstructured":"Clayton\u00a0Christensen et al.2016. Know Your Customers\u2019 \"Jobs to Be Done\". https:\/\/hbr.org\/2016\/09\/know-your-customers-jobs-to-be-done."},{"key":"e_1_3_2_1_12_1","unstructured":"David\u00a0Gevorkyan et al.2019. Page Simulation for Better Offline Metrics at Netflix. https:\/\/netflixtechblog.com\/page-simulator-fa02069fb269."},{"key":"e_1_3_2_1_13_1","unstructured":"Michael\u00a0Chiu et al. 2018. Notes from the AI frontier: Applications and value of deep learning. https:\/\/www.mckinsey.com\/featured-insights\/artificial-intelligence\/notes-from-the-ai-frontier-applications-and-value-of-deep-learning."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2843948"},{"key":"e_1_3_2_1_15_1","unstructured":"Cristos Goodrow. 2021. On YouTube\u2019s recommendation system. https:\/\/blog.youtube\/inside-youtube\/on-youtubes-recommendation-system\/."},{"key":"e_1_3_2_1_16_1","volume-title":"Session-based recommendations with recurrent neural networks. arXiv preprint arXiv:1511.06939","author":"Hidasi Bal\u00e1zs","year":"2015","unstructured":"Bal\u00e1zs Hidasi, Alexandros Karatzoglou, Linas Baltrunas, and Domonkos Tikk. 2015. Session-based recommendations with recurrent neural networks. arXiv preprint arXiv:1511.06939 (2015)."},{"volume-title":"Automated machine learning: methods, systems, challenges","author":"Hutter Frank","key":"e_1_3_2_1_17_1","unstructured":"Frank Hutter, Lars Kotthoff, and Joaquin Vanschoren. 2019. Automated machine learning: methods, systems, challenges. Springer Nature."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474247"},{"key":"e_1_3_2_1_19_1","volume-title":"International Conference on Machine Learning. PMLR, 1453\u20131461","author":"Joulani Pooria","year":"2013","unstructured":"Pooria Joulani, Andras Gyorgy, and Csaba Szepesv\u00e1ri. 2013. Online learning under delayed feedback. In International Conference on Machine Learning. PMLR, 1453\u20131461."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_1_21_1","volume-title":"The epoch-greedy algorithm for multi-armed bandits with side information. Advances in neural information processing systems 20","author":"Langford John","year":"2007","unstructured":"John Langford and Tong Zhang. 2007. The epoch-greedy algorithm for multi-armed bandits with side information. Advances in neural information processing systems 20 (2007)."},{"key":"e_1_3_2_1_22_1","first-page":"145","article-title":"Bayesian Optimization for Policy Search via Online-Offline Experimentation.J","volume":"20","author":"Letham Benjamin","year":"2019","unstructured":"Benjamin Letham and Eytan Bakshy. 2019. Bayesian Optimization for Policy Search via Online-Offline Experimentation.J. Mach. Learn. Res. 20 (2019), 145\u20131.","journal-title":"Mach. Learn. Res."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/1935826.1935878"},{"key":"e_1_3_2_1_25_1","volume-title":"The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 2956\u20132965","author":"Ma Yifei","year":"2019","unstructured":"Yifei Ma, Yu-Xiang Wang, and Balakrishnan Narayanaswamy. 2019. Imitation-regularized offline learning. In The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 2956\u20132965."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50030-1"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3272027"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the REVEAL Workshop at the 16th ACM Conference on Recommender Systems.","author":"More Ajinkya","year":"2019","unstructured":"Ajinkya More, Linas Baltrunas, Nikos Vlassis, and Justin Basilico. 2019. Recap: Designing a more Efficient Estimator for Off-policy Evaluation in Bandits with Large Action Spaces. In Proceedings of the REVEAL Workshop at the 16th ACM Conference on Recommender Systems."},{"key":"e_1_3_2_1_29_1","unstructured":"Adam Mosseri. 2021. Shedding More Light on How Instagram Works. https:\/\/about.instagram.com\/blog\/announcements\/shedding-more-light-on-how-instagram-works."},{"key":"e_1_3_2_1_30_1","unstructured":"Netflix. 2023. About us. https:\/\/about.netflix.com\/en."},{"key":"e_1_3_2_1_31_1","unstructured":"Andrew\u00a0Y Ng Daishi Harada and Stuart Russell. 1999. Policy invariance under reward transformations: Theory and application to reward shaping. In Icml Vol.\u00a099. Citeseer 278\u2013287."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2566486.2568012"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313404"},{"key":"e_1_3_2_1_34_1","unstructured":"Jette Randl\u00f8v and Preben Alstr\u00f8m. 1998. Learning to Drive a Bicycle Using Reinforcement Learning and Shaping.. In ICML Vol.\u00a098. 463\u2013471."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772773"},{"key":"e_1_3_2_1_36_1","volume-title":"Reinforcement today.American Psychologist 13, 3","author":"Skinner F","year":"1958","unstructured":"Burrhus\u00a0F Skinner. 1958. Reinforcement today.American Psychologist 13, 3 (1958), 94."},{"volume-title":"Reinforcement learning: An introduction","author":"Sutton S","key":"e_1_3_2_1_37_1","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_38_1","volume-title":"International Conference on Machine Learning. PMLR, 814\u2013823","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. Counterfactual risk minimization: Learning from logged bandit feedback. In International Conference on Machine Learning. PMLR, 814\u2013823."},{"key":"e_1_3_2_1_39_1","unstructured":"TikTok. 2020. How TikTok recommends video ForYou. https:\/\/newsroom.tiktok.com\/en-us\/how-tiktok-recommends-videos-for-you."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372834"},{"key":"e_1_3_2_1_41_1","unstructured":"Twitter. 2023. Twitter\u2019s Recommendation Algorithm. https:\/\/blog.twitter.com\/engineering\/en_us\/topics\/open-source\/2023\/twitter-recommendation-algorithm."},{"key":"e_1_3_2_1_42_1","volume-title":"A review of off-policy evaluation in reinforcement learning. arXiv preprint arXiv:2212.06355","author":"Uehara Masatoshi","year":"2022","unstructured":"Masatoshi Uehara, Chengchun Shi, and Nathan Kallus. 2022. A review of off-policy evaluation in reinforcement learning. arXiv preprint arXiv:2212.06355 (2022)."},{"key":"e_1_3_2_1_43_1","volume-title":"Learning values across many orders of magnitude. Advances in neural information processing systems 29","author":"van Hasselt P","year":"2016","unstructured":"Hado\u00a0P van Hasselt, Arthur Guez, Matteo Hessel, Volodymyr Mnih, and David Silver. 2016. Learning values across many orders of magnitude. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_1_44_1","volume-title":"International Conference on Machine Learning. PMLR, 9712\u20139721","author":"Vernade Claire","year":"2020","unstructured":"Claire Vernade, Alexandra Carpentier, Tor Lattimore, Giovanni Zappella, Beyza Ermis, and Michael Brueckner. 2020. Linear bandits with stochastic delayed feedback. In International Conference on Machine Learning. PMLR, 9712\u20139721."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539073"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133025"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539040"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462892"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"RecSys '23: Seventeenth ACM Conference on Recommender Systems","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGecom Special Interest Group on Economics and Computation"],"location":"Singapore Singapore","acronym":"RecSys '23"},"container-title":["Proceedings of the 17th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3604915.3608873","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3604915.3608873","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:34Z","timestamp":1750178794000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3604915.3608873"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,14]]},"references-count":49,"alternative-id":["10.1145\/3604915.3608873","10.1145\/3604915"],"URL":"https:\/\/doi.org\/10.1145\/3604915.3608873","relation":{},"subject":[],"published":{"date-parts":[[2023,9,14]]},"assertion":[{"value":"2023-09-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}