{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:48:37Z","timestamp":1755794917977,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709279","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:42:22Z","timestamp":1743792142000},"page":"1008-1019","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Achieving Nearly-Optimal Regret and Sample Complexity in Dueling Bandits with Applications in Online Recommendations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7978-6146","authenticated-orcid":false,"given":"Lanjihong","family":"Ma","sequence":"first","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, School of Artificial Intelligence, Nanjing University, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8580-1103","authenticated-orcid":false,"given":"Yao-Xiang","family":"Ding","sequence":"additional","affiliation":[{"name":"State Key Lab of CAD &amp; CG, Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2101-1836","authenticated-orcid":false,"given":"Zhen-Yu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Center for Advanced Intelligence Project, RIKEN, Tokyo, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0746-1494","authenticated-orcid":false,"given":"Zhi-Hua","family":"Zhou","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, School of Artificial Intelligence, Nanjing University, Nanjing, Jiangsu, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","first-page":"856","volume-title":"Proceedings of the 31th International Conference on Machine Learning (ICML)","author":"Ailon Nir","year":"2014","unstructured":"Nir Ailon, Zohar Karnin, and Thorsten Joachims. Reducing dueling bandits to cardinal bandits. In Proceedings of the 31th International Conference on Machine Learning (ICML), pages 856--864, 2014."},{"key":"e_1_3_2_2_2_1","first-page":"41","volume-title":"Proceedings of the 23rd Conference on Learning Theory (COLT)","author":"Audibert Jean-Yves","year":"2010","unstructured":"Jean-Yves Audibert, S\u00e9bastien Bubeck, and R\u00e9mi Munos. Best arm identification in multi-armed bandits. In Proceedings of the 23rd Conference on Learning Theory (COLT), pages 41--53, 2010."},{"key":"e_1_3_2_2_3_1","first-page":"7","article-title":"Preference-based online learning with dueling bandits: A survey","volume":"22","author":"Bengs Viktor","year":"2021","unstructured":"Viktor Bengs, R\u00f3bert Busa-Fekete, Adil El Mesaoudi-Paul, and Eyke H\u00fcllermeier. Preference-based online learning with dueling bandits: A survey. Journal of Machine Learning Research, 22:7:1--7:108, 2021.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_4_1","first-page":"1071","volume-title":"Proceedings of the 31th International Conference on Machine Learning (ICML)","author":"Busa-Fekete R\u00f3bert","year":"2014","unstructured":"R\u00f3bert Busa-Fekete, Eyke H\u00fcllermeier, and Bal\u00e1zs Sz\u00f6r\u00e9nyi. Preference-based rank elicitation using statistical models: The case of mallows. In Proceedings of the 31th International Conference on Machine Learning (ICML), pages 1071--1079, 2014."},{"key":"e_1_3_2_2_5_1","first-page":"731","volume-title":"Proceedings of the 34th International Conference on Machine Learning (ICML)","author":"Chen Bangrui","year":"2017","unstructured":"Bangrui Chen and Peter I Frazier. Dueling bandits withweak regret. In Proceedings of the 34th International Conference on Machine Learning (ICML), pages 731--739, 2017."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220122"},{"key":"e_1_3_2_2_7_1","first-page":"1988","volume-title":"Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS)","author":"Degenne R\u00e9my","year":"2019","unstructured":"R\u00e9my Degenne, Thomas Nedelec, Cl\u00e9ment Calauz\u00e8nes, and Vianney Perchet. Bridging the gap between regret minimization and best arm identification, with application to A\/B tests. In Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS), pages 1988--1996, 2019."},{"key":"e_1_3_2_2_8_1","volume-title":"Proceedings of the 23rd International Joint Conference on Artificial Intelligence","author":"Abbasnejad E. V.","year":"2013","unstructured":"E. V. Bonilla E. Abbasnejad, S. Sanner and P. Poupart. Learning community-based preferences via dirichlet process mixtures of gaussian processes. In Proceedings of the 23rd International Joint Conference on Artificial Intelligence, 2013."},{"key":"e_1_3_2_2_9_1","first-page":"7060","volume-title":"Advances in Neural Information Processing Systems 30 (NIPS)","author":"Falahatgar Moein","year":"2017","unstructured":"Moein Falahatgar, Yi Hao, Alon Orlitsky, Venkatadheeraj Pichapati, and Vaishakh Ravindrakumar. Maxing and ranking with few assumptions. In Advances in Neural Information Processing Systems 30 (NIPS), pages 7060--7070, 2017."},{"key":"e_1_3_2_2_10_1","first-page":"1088","volume-title":"Proceedings of the 37th International Conference on Machine Learning (ICML)","author":"Falahatgar Moein","year":"2017","unstructured":"Moein Falahatgar, Alon Orlitsky, Venkatadheeraj Pichapati, and Ananda Theertha Suresh. Maximum selection and ranking under noisy comparisons. In Proceedings of the 37th International Conference on Machine Learning (ICML), pages 1088--1096, 2017."},{"key":"e_1_3_2_2_11_1","first-page":"1427","volume-title":"Proceedings of the 35th International Conference on Machine Learning (ICML)","author":"Falahatgar Moein","year":"2018","unstructured":"Moein Falahatgar, Ayush Jain, Alon Orlitsky, Venkatadheeraj Pichapati, and Vaishakh Ravindrakumar. The limits of maxing, ranking, and preference learning. In Proceedings of the 35th International Conference on Machine Learning (ICML), pages 1427--1436, 2018."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0097539791195877"},{"key":"e_1_3_2_2_13_1","first-page":"784","volume-title":"Advances in Neural Information Processing Systems 29 (NIPS)","author":"Garivier Aur\u00e9lien","year":"2016","unstructured":"Aur\u00e9lien Garivier, Tor Lattimore, and Emilie Kaufmann. On explore-then-commit strategies. In Advances in Neural Information Processing Systems 29 (NIPS), pages 784--792, 2016."},{"key":"e_1_3_2_2_14_1","first-page":"25904","volume-title":"Advances in Neural Information Processing Systems 34 (NeurIPS)","author":"Haddenhorst Bj\u00f6rn","year":"2021","unstructured":"Bj\u00f6rn Haddenhorst, Viktor Bengs, and Eyke H\u00fcllermeier. Identification of the generalized condorcet winner in multi-dueling bandits. In Advances in Neural Information Processing Systems 34 (NeurIPS), pages 25904--25916, 2021."},{"key":"e_1_3_2_2_15_1","first-page":"4415","volume-title":"A unifying formalism for reward learning. Advances in Neural Information Processing Systems 33 (NeurIPS)","author":"Jeon Hong Jun","year":"2020","unstructured":"Hong Jun Jeon, Smitha Milli, and Anca Dragan. Reward-rational (implicit) choice: A unifying formalism for reward learning. Advances in Neural Information Processing Systems 33 (NeurIPS), pages 4415--4426, 2020."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/956750.956823"},{"key":"e_1_3_2_2_17_1","first-page":"1141","volume-title":"Proceedings of the 28th Conference on Learning Theory (COLT)","author":"Komiyama Junpei","year":"2015","unstructured":"Junpei Komiyama, Junya Honda, Hisashi Kashima, and Hiroshi Nakagawa. Regret lower bound and optimal algorithm in dueling bandit problem. In Proceedings of the 28th Conference on Learning Theory (COLT), pages 1141--1154, 2015."},{"key":"e_1_3_2_2_18_1","first-page":"1489","volume-title":"Advances in Neural Information Processing Systems 30 (NIPS)","author":"Kumagai Wataru","year":"2017","unstructured":"Wataru Kumagai. Regret analysis for continuous dueling bandit. In Advances in Neural Information Processing Systems 30 (NIPS), pages 1489--1498, 2017."},{"issue":"4","key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3411753","article-title":"A method for effective large-scale online ranker evaluation","volume":"38","author":"Li Chang","year":"2020","unstructured":"Chang Li, Ilya Markov, Maarten de Rijke, and Masrour Zoghi. Mergedts: A method for effective large-scale online ranker evaluation. ACM Transactions on Information and Systems, 38(4):40:1--40:28, 2020.","journal-title":"ACM Transactions on Information and Systems"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.2307\/2333244"},{"key":"e_1_3_2_2_21_1","first-page":"2488","volume-title":"Proceedings of the 34th International Conference on Machine Learning (ICML)","author":"Mohajer Soheil","year":"2017","unstructured":"Soheil Mohajer, Changho Suh, and Adel M. Elmahdy. Active learning for top-k rank aggregation from noisy comparisons. In Proceedings of the 34th International Conference on Machine Learning (ICML), pages 2488--2497, 2017."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/3692070.3693563"},{"key":"e_1_3_2_2_23_1","volume-title":"Advances in Neural Information Processing Systems 35 (NeurIPS)","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll L. Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, John Schulman, Jacob Hilton, Fraser Kelton, Luke Miller, Maddie Simens, Amanda Askell, Peter Welinder, Paul F. Christiano, Jan Leike, and Ryan Lowe. Training language models to follow instructions with human feedback. In Advances in Neural Information Processing Systems 35 (NeurIPS), 2022."},{"key":"e_1_3_2_2_24_1","first-page":"1","volume-title":"Probability in the Engineering and Informational Sciences","author":"Pek\u00f6z Erol","year":"2020","unstructured":"Erol Pek\u00f6z, Sheldon M Ross, and Zhengyu Zhang. Dueling bandit problems. Probability in the Engineering and Informational Sciences, pages 1--12, 2020."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1458082.1458092"},{"key":"e_1_3_2_2_26_1","volume-title":"Direct preference optimization: Your language model is secretly a reward model. Advances in Neural Information Processing Systems 36 (NeurIPS)","author":"Rafailov Rafael","year":"2024","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Stefano Ermon, Christopher D. Manning, and Chelsea Finn. Direct preference optimization: Your language model is secretly a reward model. Advances in Neural Information Processing Systems 36 (NeurIPS), 2024."},{"key":"e_1_3_2_2_27_1","first-page":"10014","volume-title":"Advances in Neural Information Processing Systems 32 (NeurIPS)","author":"Ren Wenbo","year":"2019","unstructured":"Wenbo Ren, Jia Liu, and Ness B. Shroff. On sample complexity upper and lower bounds for exact ranking from noisy comparisons. In Advances in Neural Information Processing Systems 32 (NeurIPS), pages 10014--10024, 2019."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525684"},{"key":"e_1_3_2_2_29_1","volume-title":"Advances in Neural Information Processing Systems 38 (NeurIPS), page to appear","author":"Saad El Mehdi","year":"2024","unstructured":"El Mehdi Saad, Alexandra Carpentier, Tom\u00e1\u0161 Koc\u00e1k, and Nicolas Verzelen. On weak regret analysis for dueling bandits. In Advances in Neural Information Processing Systems 38 (NeurIPS), page to appear, 2024."},{"key":"e_1_3_2_2_30_1","first-page":"9245","volume-title":"Proceedings of the 38th International Conference on Machine Learning (ICML)","volume":"139","author":"Saha Aadirupa","year":"2021","unstructured":"Aadirupa Saha, Tomer Koren, and Yishay Mansour. Dueling convex optimization. In Proceedings of the 38th International Conference on Machine Learning (ICML), volume 139, pages 9245--9254, 2021."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330705"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539439"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553527"},{"key":"e_1_3_2_2_34_1","first-page":"241","volume-title":"Proceedings of the 28th International Conference on Machine Learning (ICML)","author":"Yue Yisong","year":"2011","unstructured":"Yisong Yue and Thorsten Joachims. Beat the mean bandit. In Proceedings of the 28th International Conference on Machine Learning (ICML), pages 241--248, 2011."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2011.12.028"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939878"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2016.2569096"},{"key":"e_1_3_2_2_38_1","first-page":"10","volume-title":"Proceedings of the 31th International conference on machine learning (ICML)","author":"Zoghi Masrour","year":"2014","unstructured":"Masrour Zoghi, Shimon Whiteson, Remi Munos, and Maarten Rijke. Relative upper confidence bound for the k-armed dueling bandit problem. In Proceedings of the 31th International conference on machine learning (ICML), pages 10--18, 2014."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684822.2685290"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709279","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709279","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:36:39Z","timestamp":1755358599000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709279"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":39,"alternative-id":["10.1145\/3690624.3709279","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709279","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}