{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:20:35Z","timestamp":1750220435700,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,7,11]],"date-time":"2021-07-11T00:00:00Z","timestamp":1625961600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["IIS-1553568, IIS-2007492, and IIS-1618948"],"award-info":[{"award-number":["IIS-1553568, IIS-2007492, and IIS-1618948"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,7,11]]},"DOI":"10.1145\/3404835.3462810","type":"proceedings-article","created":{"date-parts":[[2021,7,12]],"date-time":"2021-07-12T03:08:25Z","timestamp":1626059305000},"page":"2658-2661","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Interactive Information Retrieval with Bandit Feedback"],"prefix":"10.1145","author":[{"given":"Huazheng","family":"Wang","sequence":"first","affiliation":[{"name":"University of Virginia, Charlottesville, VA, USA"}]},{"given":"Yiling","family":"Jia","sequence":"additional","affiliation":[{"name":"University of Virginia, Charlottesville, VA, USA"}]},{"given":"Hongning","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Virginia, Charlottesville, VA, USA"}]}],"member":"320","published-online":{"date-parts":[[2021,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"2312","volume-title":"NIPS","author":"Y.","year":"2011","unstructured":"Y. Abbasi-yadkori, D. P\u00e1l, and C. Szepesv\u00e1ri. Improved algorithms for linear stochastic bandits. In NIPS, pages 2312--2320. 2011."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313697"},{"key":"e_1_3_2_1_3_1","first-page":"127","volume-title":"International Conference on Machine Learning","author":"Agrawal S.","year":"2013","unstructured":"S. Agrawal and N. Goyal. Thompson sampling for contextual bandits with linear payoffs. In International Conference on Machine Learning, pages 127--135, 2013."},{"key":"e_1_3_2_1_4_1","volume-title":"Unbiased learning to rank: Online or offline? ACM Transactions on Information Systems (TOIS), 39(2):1--29","author":"Ai Q.","year":"2021","unstructured":"Q. Ai, T. Yang, H. Wang, and J. Mao. Unbiased learning to rank: Online or offline? ACM Transactions on Information Systems (TOIS), 39(2):1--29, 2021."},{"key":"e_1_3_2_1_5_1","first-page":"41","volume-title":"COLT","author":"Audibert J.-Y.","year":"2010","unstructured":"J.-Y. Audibert, S. Bubeck, and R. Munos. Best arm identification in multi-armed bandits. In COLT, pages 41--53, 2010."},{"key":"e_1_3_2_1_6_1","volume-title":"Using confidence bounds for exploitation-exploration trade-offs. Journal of Machine Learning Research, 3(Nov):397--422","author":"Auer P.","year":"2002","unstructured":"P. Auer. Using confidence bounds for exploitation-exploration trade-offs. Journal of Machine Learning Research, 3(Nov):397--422, 2002."},{"key":"e_1_3_2_1_7_1","volume-title":"May","author":"Auer P.","year":"2002","unstructured":"P. Auer, N. Cesa-Bianchi, and P. Fischer. Finite-time analysis of the multiarmed bandit problem. Mach. Learn., 47(2--3):235--256, May 2002."},{"key":"e_1_3_2_1_8_1","volume-title":"Pro. NIPS","author":"Cesa-Bianchi N.","year":"2013","unstructured":"N. Cesa-Bianchi, C. Gentile, and G. Zappella. A gang of bandits. In Pro. NIPS, 2013."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/3398761.3398990"},{"key":"e_1_3_2_1_10_1","volume-title":"On upper-confidence bound policies for non-stationary bandit problems. In arXiv preprint arXiv:0805.3415","author":"Garivier A.","year":"2008","unstructured":"A. Garivier and E. Moulines. On upper-confidence bound policies for non-stationary bandit problems. In arXiv preprint arXiv:0805.3415 (2008)."},{"key":"e_1_3_2_1_11_1","first-page":"757","volume-title":"Pro. of the 31st International Conference on Machine Learning (ICML-14)","author":"Gentile C.","year":"2014","unstructured":"C. Gentile, S. Li, and G. Zappella. Online clustering of bandits. In Pro. of the 31st International Conference on Machine Learning (ICML-14), pages 757--765, 2014."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2914798"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449972"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130332.3130334"},{"key":"e_1_3_2_1_15_1","first-page":"325","volume-title":"Advances in Neural Information Processing Systems","author":"Joseph M.","year":"2016","unstructured":"M. Joseph, M. Kearns, J. H. Morgenstern, and A. Roth. Fairness in learning: Classic and contextual bandits. In Advances in Neural Information Processing Systems, pages 325--333, 2016."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-34106-9_18"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"e_1_3_2_1_18_1","volume-title":"Perturbed-history exploration in stochastic linear bandits. arXiv preprint arXiv:1903.09132","author":"Kveton B.","year":"2019","unstructured":"B. Kveton, C. Szepesvari, M. Ghavamzadeh, and C. Boutilier. Perturbed-history exploration in stochastic linear bandits. arXiv preprint arXiv:1903.09132, 2019."},{"key":"e_1_3_2_1_19_1","first-page":"3601","volume-title":"International Conference on Machine Learning","author":"Kveton B.","year":"2019","unstructured":"B. Kveton, C. Szepesvari, S. Vaswani, Z. Wen, T. Lattimore, and M. Ghavamzadeh. Garbage in, reward out: Bootstrapping exploration in multi-armed bandits. In International Conference on Machine Learning, pages 3601--3610, 2019."},{"key":"e_1_3_2_1_20_1","volume-title":"Cascading bandits. arXiv preprint arXiv:1502.02763","author":"Kveton B.","year":"2015","unstructured":"B. Kveton, C. Szepesvari, Z. Wen, and A. Ashkan. Cascading bandits. arXiv preprint arXiv:1502.02763, 2015."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/2981562.2981665"},{"key":"e_1_3_2_1_22_1","first-page":"3945","volume-title":"Advances in Neural Information Processing Systems","author":"Lattimore T.","year":"2018","unstructured":"T. Lattimore, B. Kveton, S. Li, and C. Szepesvari. Toprank: A practical algorithm for online stochastic ranking. In Advances in Neural Information Processing Systems, pages 3945--3954, 2018."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911548"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14267-3"},{"key":"e_1_3_2_1_26_1","first-page":"1739","volume-title":"Conference On Learning Theory","author":"Luo H.","year":"2018","unstructured":"H. Luo, C.-Y. Wei, A. Agarwal, and J. Langford. Efficient contextual bandits in non-stationary worlds. In Conference On Learning Theory, pages 1739--1776, 2018."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1561\/9781680835335"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401100"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271686"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441794"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366424.3383107"},{"key":"e_1_3_2_1_32_1","first-page":"7276","volume-title":"Advances in Neural Information Processing Systems","author":"Qi Y.","year":"2018","unstructured":"Y. Qi, Q. Wu, H. Wang, J. Tang, and M. Sun. Bandit learning with implicit feedback. In Advances in Neural Information Processing Systems, pages 7276--7286, 2018."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390255"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2835776.2835804"},{"key":"e_1_3_2_1_35_1","first-page":"4296","volume-title":"Advances in Neural Information Processing Systems","author":"Shariff R.","year":"2018","unstructured":"R. Shariff and O. Sheffet. Differentially private contextual linear bandits. In Advances in Neural Information Processing Systems, pages 4296--4306, 2018."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331264"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2983323.2983847"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.5555\/3298483.3298627"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412254"},{"key":"e_1_3_2_1_40_1","volume-title":"SIGIR","author":"Wu Q.","year":"2018","unstructured":"Q. Wu, N. Iyer, and H. Wang. Learning contextual bandits in a collaborative environment. In SIGIR 2018."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330874"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911528"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313727"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406484"},{"key":"e_1_3_2_1_45_1","volume-title":"Maximizing marginal fairness for dynamic learning to rank. arXiv preprint arXiv:2102.09670","author":"Yang T.","year":"2021","unstructured":"T. Yang and Q. Ai. Maximizing marginal fairness for dynamic learning to rank. arXiv preprint arXiv:2102.09670, 2021."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2011.12.028"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553527"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401424"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462811"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/186"}],"event":{"name":"SIGIR '21: The 44th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Virtual Event Canada","acronym":"SIGIR '21"},"container-title":["Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3404835.3462810","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3404835.3462810","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3404835.3462810","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:47:16Z","timestamp":1750193236000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3404835.3462810"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,11]]},"references-count":50,"alternative-id":["10.1145\/3404835.3462810","10.1145\/3404835"],"URL":"https:\/\/doi.org\/10.1145\/3404835.3462810","relation":{},"subject":[],"published":{"date-parts":[[2021,7,11]]},"assertion":[{"value":"2021-07-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}