{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T02:44:16Z","timestamp":1772592256243,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61902209"],"award-info":[{"award-number":["61902209"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,22]]},"DOI":"10.1145\/3696410.3714862","type":"proceedings-article","created":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T16:42:02Z","timestamp":1746463322000},"page":"1703-1715","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["MA4DIV: Multi-Agent Reinforcement Learning for Search Result Diversification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6135-2604","authenticated-orcid":false,"given":"Yiqun","family":"Chen","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9257-5498","authenticated-orcid":false,"given":"Jiaxin","family":"Mao","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9700-0693","authenticated-orcid":false,"given":"Yi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2215-5356","authenticated-orcid":false,"given":"Dehong","family":"Ma","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7536-6241","authenticated-orcid":false,"given":"Long","family":"Xia","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2127-0702","authenticated-orcid":false,"given":"Jun","family":"Fan","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4926-3357","authenticated-orcid":false,"given":"Daiting","family":"Shi","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6503-4581","authenticated-orcid":false,"given":"Zhicong","family":"Cheng","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0113-4540","authenticated-orcid":false,"given":"Simiu","family":"Gu","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0684-6205","authenticated-orcid":false,"given":"Dawei","family":"Yin","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498759.1498766"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/290941.291025"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-010-9157-1"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-011-9167-7"},{"key":"e_1_3_2_1_5_1","volume-title":"Ptde: Personalized training with distillated execution for multi-agent reinforcement learning. arXiv preprint arXiv:2210.08872","author":"Chen Yiqun","year":"2022","unstructured":"Yiqun Chen, Hangyu Mao, Tianle Zhang, Shiguang Wu, Bin Zhang, Jianye Hao, Dong Li, Bin Wang, and Hongxing Chang. 2022. Ptde: Personalized training with distillated execution for multi-agent reinforcement learning. arXiv preprint arXiv:2210.08872 (2022)."},{"key":"e_1_3_2_1_6_1","volume-title":"Commander-Soldiers Reinforcement Learning for Cooperative Multi-Agent Systems. In 2022 International Joint Conference on Neural Networks (IJCNN). IEEE, 1--7.","author":"Chen Yiqun","year":"2022","unstructured":"Yiqun Chen, Wei Yang, Tianle Zhang, Shiguang Wu, and Hongxing Chang. 2022. Commander-Soldiers Reinforcement Learning for Cooperative Multi-Agent Systems. In 2022 International Joint Conference on Neural Networks (IJCNN). IEEE, 1--7."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390334.1390446"},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on computers and games. Springer, 72--83","author":"Coulom R\u00e9mi","year":"2006","unstructured":"R\u00e9mi Coulom. 2006. Efficient selectivity and backup operators in Monte-Carlo tree search. In International conference on computers and games. Springer, 72--83."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3635851"},{"key":"e_1_3_2_1_10_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3209979"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1526709.1526761"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835639"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2348283.2348397"},{"key":"e_1_3_2_1_15_1","volume-title":"Haibin Wu, and Shih-wei Liao.","author":"Hu Jian","year":"2021","unstructured":"Jian Hu, Siyang Jiang, Seth Austin Harding, Haibin Wu, and Shih-wei Liao. 2021. Rethinking the implementation tricks and monotonicity constraint in cooperative multi-agent reinforcement learning. arXiv e-prints (2021), arXiv--2102."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2806416.2806455"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080805"},{"key":"e_1_3_2_1_18_1","volume-title":"International conference on machine learning. PMLR, 1188--1196","author":"Le Quoc","year":"2014","unstructured":"Quoc Le and Tomas Mikolov. 2014. Distributed representations of sentences and documents. In International conference on machine learning. PMLR, 1188--1196."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/1526709.1526720"},{"key":"e_1_3_2_1_20_1","volume-title":"OpenAI Pieter Abbeel, and Igor Mordatch","author":"Lowe Ryan","year":"2017","unstructured":"Ryan Lowe, Yi I Wu, Aviv Tamar, Jean Harb, OpenAI Pieter Abbeel, and Igor Mordatch. 2017. Multi-agent actor-critic for mixed cooperative-competitive environments. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04174-7_8"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401104"},{"key":"e_1_3_2_1_23_1","volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"Puterman Martin L","unstructured":"Martin L Puterman. 2014. Markov decision processes: discrete stochastic dynamic programming. John Wiley & Sons."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3411914"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544103"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390255"},{"key":"e_1_3_2_1_27_1","volume-title":"International conference on machine learning. PMLR, 4295--4304","author":"Rashid Tabish","year":"2018","unstructured":"Tabish Rashid, Mikayel Samvelyan, Christian Schroeder, Gregory Farquhar, Jakob Foerster, and Shimon Whiteson. 2018. Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning. In International conference on machine learning. PMLR, 4295--4304."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772780"},{"key":"e_1_3_2_1_29_1","volume-title":"Policy learning for fairness in ranking. Advances in neural information processing systems 32","author":"Singh Ashudeep","year":"2019","unstructured":"Ashudeep Singh and Thorsten Joachims. 2019. Policy learning for fairness in ranking. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462872"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539459"},{"key":"e_1_3_2_1_32_1","volume-title":"Vinicius Zambaldi, Max Jaderberg, Marc Lanctot, Nicolas Sonnerat, Joel Z Leibo, Karl Tuyls, et al.","author":"Sunehag Peter","year":"2017","unstructured":"Peter Sunehag, Guy Lever, Audrunas Gruslys, Wojciech Marian Czarnecki, Vinicius Zambaldi, Max Jaderberg, Marc Lanctot, Nicolas Sonnerat, Joel Z Leibo, Karl Tuyls, et al. 2017. Value-decomposition networks for cooperative multi-agent learning. arXiv preprint arXiv:1706.05296 (2017)."},{"key":"e_1_3_2_1_33_1","volume-title":"Learning to predict by the methods of temporal differences. Machine learning 3","author":"Sutton Richard S","year":"1988","unstructured":"Richard S Sutton. 1988. Learning to predict by the methods of temporal differences. Machine learning 3 (1988), 9--44."},{"key":"e_1_3_2_1_34_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_35_1","volume-title":"Qplex: Duplex dueling multi-agent q-learning. arXiv preprint arXiv:2008.01062","author":"Wang Jianhao","year":"2020","unstructured":"Jianhao Wang, Zhizhou Ren, Terry Liu, Yang Yu, and Chongjie Zhang. 2020. Qplex: Duplex dueling multi-agent q-learning. arXiv preprint arXiv:2008.01062 (2020)."},{"key":"e_1_3_2_1_36_1","volume-title":"Rode: Learning roles to decompose multi-agent tasks. arXiv preprint arXiv:2010.01523","author":"Wang Tonghan","year":"2020","unstructured":"Tonghan Wang, Tarun Gupta, Anuj Mahajan, Bei Peng, Shimon Whiteson, and Chongjie Zhang. 2020. Rode: Learning roles to decompose multi-agent tasks. arXiv preprint arXiv:2010.01523 (2020)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080685"},{"key":"e_1_3_2_1_38_1","volume-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine learning 8","author":"Williams Ronald J","year":"1992","unstructured":"Ronald J Williams. 1992. Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine learning 8 (1992), 229--256."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390306"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766462.2767710"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911498"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080775"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401148"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2983921"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449831"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380294"},{"key":"e_1_3_2_1_47_1","first-page":"24611","article-title":"The surprising effectiveness of ppo in cooperative multi-agent games","volume":"35","author":"Yu Chao","year":"2022","unstructured":"Chao Yu, Akash Velu, Eugene Vinitsky, Jiaxuan Gao, Yu Wang, Alexandre Bayen, and Yi Wu. 2022. The surprising effectiveness of ppo in cooperative multi-agent games. Advances in Neural Information Processing Systems 35 (2022), 24611--24624.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i9.21282"},{"key":"e_1_3_2_1_49_1","volume-title":"Acm sigir forum","author":"Zhai ChengXiang","unstructured":"ChengXiang Zhai, William W Cohen, and John Lafferty. 2015. Beyond independent relevance: methods and evaluation metrics for subtopic retrieval. In Acm sigir forum, Vol. 49. ACM New York, NY, USA, 2--9."},{"key":"e_1_3_2_1_50_1","volume-title":"Efficient cooperation strategy generation in multi-agent video games via hypergraph neural network. arXiv preprint arXiv:2203.03265","author":"Zhang Bin","year":"2022","unstructured":"Bin Zhang, Yunpeng Bai, Zhiwei Xu, Dapeng Li, and Guoliang Fan. 2022. Efficient cooperation strategy generation in multi-agent video games via hypergraph neural network. arXiv preprint arXiv:2203.03265 (2022)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/40"},{"key":"e_1_3_2_1_52_1","volume-title":"Stackelberg decision transformer for asynchronous action coordination in multi-agent systems. arXiv preprint arXiv:2305.07856","author":"Zhang Bin","year":"2023","unstructured":"Bin Zhang, Hangyu Mao, Lijuan Li, Zhiwei Xu, Dapeng Li, Rui Zhao, and Guoliang Fan. 2023. Stackelberg decision transformer for asynchronous action coordination in multi-agent systems. arXiv preprint arXiv:2305.07856 (2023)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380047"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609634"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358075"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714862","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714862","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:42Z","timestamp":1750295922000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714862"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":55,"alternative-id":["10.1145\/3696410.3714862","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714862","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}