{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:08:27Z","timestamp":1757617707139,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,22]]},"DOI":"10.1145\/3705328.3748144","type":"proceedings-article","created":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T10:48:44Z","timestamp":1757155724000},"page":"945-948","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Deep Reinforcement Learning for Ranking Utility Tuning in the Ad Recommender System at Pinterest"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4534-5494","authenticated-orcid":false,"given":"Xiao","family":"Yang","sequence":"first","affiliation":[{"name":"Pinterest, San Francisco, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5727-3219","authenticated-orcid":false,"given":"Mehdi","family":"Ayed","sequence":"additional","affiliation":[{"name":"Pinterest, New York, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2673-3265","authenticated-orcid":false,"given":"Longyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4751-9619","authenticated-orcid":false,"given":"Fan","family":"Zhou","sequence":"additional","affiliation":[{"name":"Pinterest, Seattle, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8232-1371","authenticated-orcid":false,"given":"Yuchen","family":"Shen","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2934-2736","authenticated-orcid":false,"given":"Abe","family":"Engle","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3834-7628","authenticated-orcid":false,"given":"Jinfeng","family":"Zhuang","sequence":"additional","affiliation":[{"name":"Pinterest, Seattle, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9081-5700","authenticated-orcid":false,"given":"Ling","family":"Leng","sequence":"additional","affiliation":[{"name":"Pinterest, Seattle, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4761-5171","authenticated-orcid":false,"given":"Jiajing","family":"Xu","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9664-8644","authenticated-orcid":false,"given":"Charles","family":"Rosenberg","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7737-1667","authenticated-orcid":false,"given":"Prathibha","family":"Deshikachar","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,9,7]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"L\u00e9on Bottou Jonas Peters Joaquin Qui\u00f1onero-Candela Denis\u00a0X. Charles D.\u00a0Max Chickering Elon Portugaly Dipankar Ray Patrice Simard and Ed Snelson. 2013. Counterfactual Reasoning and Learning Systems: The Example of Computational Advertising. Journal of Machine Learning Research 14 101 3207\u20133260."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018702"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Paul Covington Jay Adams and Emre Sargin. 2016. Deep Neural Networks for YouTube Recommendations(RecSys \u201916). Association for Computing Machinery New York NY USA 191\u2013198.","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/239"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/2648584.2648589"},{"key":"e_1_3_3_1_8_2","first-page":"448","volume-title":"Proceedings of the 32nd International Conference on Machine Learning (ICML)","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy. 2015. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. In Proceedings of the 32nd International Conference on Machine Learning (ICML). PMLR, 448\u2013456."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3272021"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2788621"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531847"},{"key":"e_1_3_3_1_12_2","unstructured":"Zhuoran Liu Leqi Zou Xuan Zou Caihua Wang Biao Zhang Da Tang Bolin Zhu Yijie Zhu Peng Wu Ke Wang and Youlong Cheng. 2022. Monolith: Real Time Recommendation System With Collisionless Embedding Table. arxiv:https:\/\/arXiv.org\/abs\/2209.07663\u00a0[cs.IR] https:\/\/arxiv.org\/abs\/2209.07663"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3124749.3124750"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.5555\/3104322.3104425"},{"key":"e_1_3_3_1_15_2","unstructured":"David Rohde Stephen Bonner Travis Dunlop Flavian Vasile and Alexandros Karatzoglou. 2018. Recogym: A reinforcement learning environment for the problem of product recommendation in online advertising. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1808.00720 (2018)."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","unstructured":"F. Rosenblatt. 1958. The perceptron: A probabilistic model for information storage and organization in the brain. Psychological Review 65 6 (1958) 386\u2013408. 10.1037\/h0042519","DOI":"10.1037\/h0042519"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210062"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599916"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3124749.3124754"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357806"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599900"},{"key":"e_1_3_3_1_22_2","volume-title":"Reinforcement-learning connectionist systems","author":"Williams Ronald\u00a0J","year":"1987","unstructured":"Ronald\u00a0J Williams. 1987. Reinforcement-learning connectionist systems. College of Computer Science, Northeastern University."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Ronald\u00a0J Williams. 1992. Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine learning 8 (1992) 229\u2013256.","DOI":"10.1023\/A:1022672621406"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271748"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i5.16580"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3547394"},{"key":"e_1_3_3_1_27_2","unstructured":"Jiaqi Zhai Lucy Liao Xing Liu Yueming Wang Rui Li Xuan Cao Leon Gao Zhaojie Gong Fangda Gu Michael He et\u00a0al. 2024. Actions speak louder than words: Trillion-parameter sequential transducers for generative recommendations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.17152 (2024)."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219918"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16156"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Xiangyu Zhao Long Xia Jiliang Tang and Dawei Yin. 2019. \" Deep reinforcement learning for search recommendation and online advertising: a survey\" by Xiangyu Zhao Long Xia Jiliang Tang and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletter 2019 Spring (2019) 1\u201315.","DOI":"10.1145\/3320496.3320500"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403384"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"}],"event":{"name":"RecSys '25: Nineteenth ACM Conference on Recommender Systems","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGIR ACM Special Interest Group on Information Retrieval","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Prague Czech Republic","acronym":"RecSys '25"},"container-title":["Proceedings of the Nineteenth ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3705328.3748144","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T11:52:29Z","timestamp":1757159549000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3705328.3748144"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,7]]},"references-count":32,"alternative-id":["10.1145\/3705328.3748144","10.1145\/3705328"],"URL":"https:\/\/doi.org\/10.1145\/3705328.3748144","relation":{},"subject":[],"published":{"date-parts":[[2025,9,7]]},"assertion":[{"value":"2025-09-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}