{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:13:20Z","timestamp":1755825200283,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"``New Generation of AI 2030' Major Project","award":["2018AAA0100900"],"award-info":[{"award-number":["2018AAA0100900"]}]},{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62177033, 62076161, 62322603"],"award-info":[{"award-number":["62177033, 62076161, 62322603"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanghai Municipal Science and Technology Major Project","award":["2021SHZDZX0102"],"award-info":[{"award-number":["2021SHZDZX0102"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589335.3648317","type":"proceedings-article","created":{"date-parts":[[2024,5,12]],"date-time":"2024-05-12T18:41:21Z","timestamp":1715539281000},"page":"196-205","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["HiFI: Hierarchical Fairness-aware Integrated Ranking with Constrained Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8726-3927","authenticated-orcid":false,"given":"Yifan","family":"Liu","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2544-775X","authenticated-orcid":false,"given":"Wei","family":"Xia","sequence":"additional","affiliation":[{"name":"Huawei Noah's Ark Lab, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9148-3997","authenticated-orcid":false,"given":"Weiwen","family":"Liu","sequence":"additional","affiliation":[{"name":"Huawei Noah's Ark Lab, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8567-2185","authenticated-orcid":false,"given":"Menghui","family":"Zhu","sequence":"additional","affiliation":[{"name":"Huawei Noah's Ark Lab, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0127-2425","authenticated-orcid":false,"given":"Weinan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9224-2431","authenticated-orcid":false,"given":"Ruiming","family":"Tang","sequence":"additional","affiliation":[{"name":"Huawei Noah's Ark Lab, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0281-8271","authenticated-orcid":false,"given":"Yong","family":"Yu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"unstructured":"2020. MindSpore. https:\/\/www.mindspore.cn\/.","key":"e_1_3_2_2_1_1"},{"key":"e_1_3_2_2_2_1","volume-title":"Managing popularity bias in recommender systems with personalized re-ranking. arXiv preprint arXiv:1901.07555","author":"Abdollahpouri Himan","year":"2019","unstructured":"Himan Abdollahpouri, Robin Burke, and Bamshad Mobasher. 2019. Managing popularity bias in recommender systems with personalized re-ranking. arXiv preprint arXiv:1901.07555 (2019)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_3_1","DOI":"10.1145\/3543846"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_4_1","DOI":"10.1145\/3209978.3209985"},{"volume-title":"Constrained Markov decision processes","author":"Altman Eitan","unstructured":"Eitan Altman. 1999. Constrained Markov decision processes. Vol. 7. CRC press.","key":"e_1_3_2_2_5_1"},{"key":"e_1_3_2_2_6_1","volume-title":"Recent advances in hierarchical reinforcement learning. Discrete event dynamic systems 13, 1--2","author":"Barto Andrew G","year":"2003","unstructured":"Andrew G Barto and Sridhar Mahadevan. 2003. Recent advances in hierarchical reinforcement learning. Discrete event dynamic systems 13, 1--2 (2003), 41--77."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_7_1","DOI":"10.1145\/3292500.3330745"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_8_1","DOI":"10.1145\/290941.291025"},{"key":"e_1_3_2_2_9_1","volume-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555","author":"Chung Junyoung","year":"2014","unstructured":"Junyoung Chung, Caglar Gulcehre, KyungHyun Cho, and Yoshua Bengio. 2014. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_10_1","DOI":"10.1109\/TCYB.2021.3089941"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_11_1","DOI":"10.1109\/ISIT.2004.1365067"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_12_1","DOI":"10.1145\/3488560.3498487"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_13_1","DOI":"10.1145\/3292500.3330691"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_14_1","DOI":"10.1145\/2783258.2788583"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_15_1","DOI":"10.1016\/j.eij.2015.06.005"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_16_1","DOI":"10.1145\/3130348.3130374"},{"volume-title":"Advances in Neural Information Processing Systems","author":"Konda Vijay","unstructured":"Vijay Konda and John Tsitsiklis. 1999. Actor-Critic Algorithms. In Advances in Neural Information Processing Systems, S. Solla, T. Leen, and K. M\u00fcller (Eds.), Vol. 12. MIT Press.","key":"e_1_3_2_2_17_1"},{"key":"e_1_3_2_2_18_1","volume-title":"Coptidice: Offline constrained reinforcement learning via stationary distribution correction estimation. arXiv preprint arXiv:2204.08957","author":"Lee Jongmin","year":"2022","unstructured":"Jongmin Lee, Cosmin Paduraru, Daniel J Mankowitz, Nicolas Heess, Doina Precup, Kee-Eung Kim, and Arthur Guez. 2022. Coptidice: Offline constrained reinforcement learning via stationary distribution correction estimation. arXiv preprint arXiv:2204.08957 (2022)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_19_1","DOI":"10.1145\/3404835.3462814"},{"key":"e_1_3_2_2_20_1","volume-title":"Cross DQN: Cross Deep Q Network for Ads Allocation in Feed. arXiv preprint arXiv:2109.04353","author":"Liao Guogang","year":"2021","unstructured":"Guogang Liao, Ze Wang, Xiaoxu Wu, Xiaowen Shi, Chuheng Zhang, Yongkang Wang, XingxingWang, and DongWang. 2021. Cross DQN: Cross Deep Q Network for Ads Allocation in Feed. arXiv preprint arXiv:2109.04353 (2021)."},{"unstructured":"Jianghao Lin Xinyi Dai Yunjia Xi Weiwen Liu Bo Chen Xiangyang Li Chenxu Zhu Huifeng Guo Yong Yu Ruiming Tang et al. 2023. How Can Recommender Systems Benefit from Large Language Models: A Survey. arXiv preprint arXiv:2306.05817 (2023).","key":"e_1_3_2_2_21_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_22_1","DOI":"10.1145\/3580305.3599422"},{"key":"e_1_3_2_2_23_1","volume-title":"Neural Re-ranking in Multi-stage Recommender Systems: A Review. arXiv preprint arXiv:2202.06602","author":"Liu Weiwen","year":"2022","unstructured":"Weiwen Liu, Yunjia Xi, Jiarui Qin, Fei Sun, Bo Chen, Weinan Zhang, Rui Zhang, and Ruiming Tang. 2022. Neural Re-ranking in Multi-stage Recommender Systems: A Review. arXiv preprint arXiv:2202.06602 (2022)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_24_1","DOI":"10.1145\/3397271.3401104"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_25_1","DOI":"10.1145\/3366423.3380196"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_26_1","DOI":"10.1145\/3298689.3347000"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_27_1","DOI":"10.1145\/3298689.3347000"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_28_1","DOI":"10.1145\/1242572.1242643"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_29_1","DOI":"10.1145\/3459637.3482006"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_30_1","DOI":"10.1145\/3240323.3240372"},{"key":"e_1_3_2_2_31_1","volume-title":"Reward constrained policy optimization. arXiv preprint arXiv:1805.11074","author":"Tessler Chen","year":"2018","unstructured":"Chen Tessler, Daniel J Mankowitz, and Shie Mannor. 2018. Reward constrained policy optimization. arXiv preprint arXiv:1805.11074 (2018)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_32_1","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"e_1_3_2_2_33_1","volume-title":"Shimon Whiteson, and Marco Wiering.","author":"Seijen Harm Van","year":"2009","unstructured":"Harm Van Seijen, Hado Van Hasselt, Shimon Whiteson, and Marco Wiering. 2009. A theoretical and empirical analysis of Expected Sarsa. In 2009 ieee symposium on adaptive dynamic programming and reinforcement learning. IEEE, 177--184."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_34_1","DOI":"10.1145\/3531146.3533079"},{"key":"e_1_3_2_2_35_1","volume-title":"Lin (Eds.)","volume":"33","author":"Wang Ziyu","year":"2020","unstructured":"Ziyu Wang, Alexander Novikov, Konrad Zolna, Josh S Merel, Jost Tobias Springenberg, Scott E Reed, Bobak Shahriari, Noah Siegel, Caglar Gulcehre, Nicolas Heess, and Nando de Freitas. 2020. Critic Regularized Regression. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 7768--7778."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_36_1","DOI":"10.18653\/v1\/2020.acl-main.331"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_37_1","DOI":"10.1145\/3539597.3570399"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_38_1","DOI":"10.1145\/3580305.3599878"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_39_1","DOI":"10.1145\/3511808.3557551"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_40_1","DOI":"10.1609\/aaai.v35i5.16580"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_41_1","DOI":"10.1145\/3488560.3498494"},{"key":"e_1_3_2_2_42_1","volume-title":"Multi-channel Integrated Recommendation with Exposure Constraints. arXiv preprint arXiv:2305.12319","author":"Xu Yue","year":"2023","unstructured":"Yue Xu, Qijie Shen, Jianwen Yin, Zengde Deng, Dimin Wang, Hao Chen, Lixiang Lai, Tao Zhuang, and Junfeng Ge. 2023. Multi-channel Integrated Recommendation with Exposure Constraints. arXiv preprint arXiv:2305.12319 (2023)."},{"key":"e_1_3_2_2_43_1","volume-title":"Integrated Ranking for News Feed with Reinforcement Learning. In Companion Proceedings of the ACM Web Conference","author":"Zhu Menghui","year":"2023","unstructured":"Menghui Zhu, Wei Xia, Weiwen Liu, Yifan Liu, Ruiming Tang, and Weinan Zhang. 2023. Integrated Ranking for News Feed with Reinforcement Learning. In Companion Proceedings of the ACM Web Conference 2023. 480--484."}],"event":{"sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"acronym":"WWW '24","name":"WWW '24: The ACM Web Conference 2024","location":"Singapore Singapore"},"container-title":["Companion Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589335.3648317","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589335.3648317","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:39:17Z","timestamp":1755823157000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589335.3648317"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":43,"alternative-id":["10.1145\/3589335.3648317","10.1145\/3589335"],"URL":"https:\/\/doi.org\/10.1145\/3589335.3648317","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}