{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T03:39:12Z","timestamp":1771299552177,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100020457","name":"C3.ai Digital Transformation Institute","doi-asserted-by":"publisher","id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100020457","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["DMS-1953686, IIS-2107304, CMMI-1653435"],"award-info":[{"award-number":["DMS-1953686, IIS-2107304, CMMI-1653435"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"ONR","award":["1006977"],"award-info":[{"award-number":["1006977"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3671892","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:55:12Z","timestamp":1724561712000},"page":"3806-3817","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Conversational Dueling Bandits in Generalized Linear Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-9921-3484","authenticated-orcid":false,"given":"Shuhua","family":"Yang","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0466-6332","authenticated-orcid":false,"given":"Hui","family":"Yuan","sequence":"additional","affiliation":[{"name":"Princeton University, Princeton, NJ, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3321-5940","authenticated-orcid":false,"given":"Xiaoying","family":"Zhang","sequence":"additional","affiliation":[{"name":"ByteDance, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2101-9507","authenticated-orcid":false,"given":"Mengdi","family":"Wang","sequence":"additional","affiliation":[{"name":"Princeton University, Princeton, NJ, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6787-2549","authenticated-orcid":false,"given":"Hong","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3918-6925","authenticated-orcid":false,"given":"Huazheng","family":"Wang","sequence":"additional","affiliation":[{"name":"Oregon State University, Corvallis, OR, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/2986459.2986717"},{"key":"e_1_3_2_2_2_1","volume-title":"COLT - 23th Conference on Learning Theory -","author":"Audibert Jean-Yves","year":"2010","unstructured":"Jean-Yves Audibert and S\u00e9bastien Bubeck. 2010. Best Arm Identification in Multi-Armed Bandits. In COLT - 23th Conference on Learning Theory - 2010. Haifa, Israel, 13 p. https:\/\/enpc.hal.science\/hal-00654404"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1007352.1007367"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.5555\/3546258.3546265"},{"key":"e_1_3_2_2_5_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"1786","author":"Bengs Viktor","year":"2022","unstructured":"Viktor Bengs, Aadirupa Saha, and Eyke H\u00fcllermeier. 2022. Stochastic Contextual Dueling Bandits under Linear Stochastic Transitivity Models. In Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesvari, Gang Niu, and Sivan Sabato (Eds.). PMLR, 1764--1786. https:\/\/proceedings.mlr.press\/v162\/bengs22a.html"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2043932.2044016"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939746"},{"key":"e_1_3_2_2_8_1","volume-title":"Proceedings of The 28th Conference on Learning Theory (Proceedings of Machine Learning Research","volume":"587","author":"Dud\u00edk Miroslav","year":"2015","unstructured":"Miroslav Dud\u00edk, Katja Hofmann, Robert E. Schapire, Aleksandrs Slivkins, and Masrour Zoghi. 2015. Contextual Dueling Bandits. In Proceedings of The 28th Conference on Learning Theory (Proceedings of Machine Learning Research, Vol. 40), Peter Gr\u00fcnwald, Elad Hazan, and Satyen Kale (Eds.). PMLR, Paris, France, 563--587. https:\/\/proceedings.mlr.press\/v40\/Dudik15.html"},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of the 23rd International Conference on Neural Information Processing Systems -","volume":"1","author":"Filippi Sarah","year":"2010","unstructured":"Sarah Filippi, Olivier Capp\u00e9, Aur\u00e9lien Garivier, and Csaba Szepesv\u00e1ri. 2010. Parametric bandits: the Generalized Linear case. In Proceedings of the 23rd International Conference on Neural Information Processing Systems - Volume 1 (Vancouver, British Columbia, Canada) (NIPS'10). Curran Associates Inc., Red Hook, NY, USA, 586--594."},{"key":"e_1_3_2_2_10_1","volume-title":"Provable Benefits of Policy Learning from Human Preferences in Contextual Bandit Problems. arXiv preprint arXiv:2307.12975","author":"Ji Xiang","year":"2023","unstructured":"Xiang Ji, Huazheng Wang, Minshuo Chen, Tuo Zhao, and Mengdi Wang. 2023. Provable Benefits of Policy Learning from Human Preferences in Contextual Bandit Problems. arXiv preprint arXiv:2307.12975 (2023)."},{"key":"e_1_3_2_2_11_1","volume-title":"Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research","volume":"2076","author":"Kveton Branislav","year":"2020","unstructured":"Branislav Kveton, Manzil Zaheer, Csaba Szepesvari, Lihong Li, Mohammad Ghavamzadeh, and Craig Boutilier. 2020. Randomized Exploration in Generalized Linear Bandits. In Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research, Vol. 108), Silvia Chiappa and Roberto Calandra (Eds.). PMLR, 2066--2076. https:\/\/proceedings.mlr.press\/v108\/kveton20a.html"},{"key":"e_1_3_2_2_12_1","volume-title":"Proceedings of the Workshop on On-line Trading of Exploration and Exploitation 2 (Proceedings of Machine Learning Research","volume":"36","author":"Li Lihong","year":"2012","unstructured":"Lihong Li, Wei Chu, John Langford, Taesup Moon, and Xuanhui Wang. 2012. An Unbiased Offline Evaluation of Contextual Bandit Algorithms with Generalized Linear Models. In Proceedings of the Workshop on On-line Trading of Exploration and Exploitation 2 (Proceedings of Machine Learning Research, Vol. 26), Dorota Glowacka, Louis Dorard, and John Shawe-Taylor (Eds.). PMLR, Bellevue, Washington, USA, 19--36. https:\/\/proceedings.mlr.press\/v26\/li12a.html"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_2_14_1","volume-title":"Proceedings of the 34th International Conference on Machine Learning -","volume":"70","author":"Li Lihong","year":"2017","unstructured":"Lihong Li, Yu Lu, and Dengyong Zhou. 2017. Provably optimal algorithms for generalized linear contextual bandits. In Proceedings of the 34th International Conference on Machine Learning - Volume 70 (Sydney, NSW, Australia) (ICML'17). JMLR.org, 2071--2080."},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the AAAI conference on artificial intelligence","volume":"35","author":"Garud Iyengar Oh","year":"2021","unstructured":"Min-hwan Oh and Garud Iyengar. 2021. Multinomial logit contextual bandits: Provable optimality and practicality. In Proceedings of the AAAI conference on artificial intelligence, Vol. 35. 9205--9213."},{"key":"e_1_3_2_2_16_1","volume-title":"Multinomial logit bandit with linear utility functions. arXiv preprint arXiv:1805.02971","author":"Ou Mingdong","year":"2018","unstructured":"Mingdong Ou, Nan Li, Shenghuo Zhu, and Rong Jin. 2018. Multinomial logit bandit with linear utility functions. arXiv preprint arXiv:1805.02971 (2018)."},{"key":"e_1_3_2_2_17_1","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, et al. 2022. Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems, Vol. 35 (2022), 27730--27744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_18_1","first-page":"30050","article-title":"Optimal Algorithms for Stochastic Contextual Preference Bandits","volume":"34","author":"Saha Aadirupa","year":"2021","unstructured":"Aadirupa Saha. 2021. Optimal Algorithms for Stochastic Contextual Preference Bandits. Advances in Neural Information Processing Systems, Vol. 34 (2021), 30050--30062.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_19_1","volume-title":"Proceedings of The 33rd International Conference on Algorithmic Learning Theory (Proceedings of Machine Learning Research","volume":"994","author":"Saha Aadirupa","year":"2022","unstructured":"Aadirupa Saha and Akshay Krishnamurthy. 2022. Efficient and Optimal Algorithms for Contextual Dueling Bandits under Realizability. In Proceedings of The 33rd International Conference on Algorithmic Learning Theory (Proceedings of Machine Learning Research, Vol. 167), Sanjoy Dasgupta and Nika Haghtalab (Eds.). PMLR, 968--994. https:\/\/proceedings.mlr.press\/v167\/saha22a.html"},{"key":"e_1_3_2_2_20_1","volume-title":"Proceedings of The 26th International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research","volume":"6289","author":"Saha Aadirupa","year":"2023","unstructured":"Aadirupa Saha, Aldo Pacchiano, and Jonathan Lee. 2023. Dueling RL: Reinforcement Learning with Trajectory Preferences. In Proceedings of The 26th International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research, Vol. 206), Francisco Ruiz, Jennifer Dy, and Jan-Willem van de Meent (Eds.). PMLR, 6263--6289. https:\/\/proceedings.mlr.press\/v206\/saha23a.html"},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Sekhari Ayush","year":"2024","unstructured":"Ayush Sekhari, Karthik Sridharan, Wen Sun, and Runzhe Wu. 2024. Contextual bandits and imitation learning with preference-based active queries. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS '23). Curran Associates Inc., Red Hook, NY, USA, Article 499, 35 pages."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i8.26225"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482328"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462920"},{"key":"e_1_3_2_2_25_1","first-page":"18784","article-title":"Preference-based reinforcement learning with finite-time guarantees","volume":"33","author":"Xu Yichong","year":"2020","unstructured":"Yichong Xu, Ruosong Wang, Lin Yang, Aarti Singh, and Artur Dubrawski. 2020. Preference-based reinforcement learning with finite-time guarantees. Advances in Neural Information Processing Systems, Vol. 33 (2020), 18784--18794.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2011.12.028"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.5555\/3104482.3104513"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380148"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512152"},{"key":"e_1_3_2_2_30_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"43067","author":"Zhu Banghua","year":"2023","unstructured":"Banghua Zhu, Michael Jordan, and Jiantao Jiao. 2023. Principled Reinforcement Learning with Human Feedback from Pairwise or K-wise Comparisons. In Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 43037--43067. https:\/\/proceedings.mlr.press\/v202\/zhu23f.html"},{"key":"e_1_3_2_2_31_1","volume-title":"Proceedings of the 31st International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"18","author":"Zoghi Masrour","year":"2014","unstructured":"Masrour Zoghi, Shimon Whiteson, Remi Munos, and Maarten Rijke. 2014. Relative Upper Confidence Bound for the K-Armed Dueling Bandit Problem. In Proceedings of the 31st International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 32), Eric P. Xing and Tony Jebara (Eds.). PMLR, Bejing, China, 10--18. https:\/\/proceedings.mlr.press\/v32\/zoghi14.html"}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671892","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671892","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671892","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:04:15Z","timestamp":1750291455000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671892"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":31,"alternative-id":["10.1145\/3637528.3671892","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3671892","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}