{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T19:23:45Z","timestamp":1740165825912,"version":"3.37.3"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Science and Technology Innovation 2030\u2014\u201cBrain Science and Brain-like Research\u201d Key Project","award":["2021ZD0201405"],"award-info":[{"award-number":["2021ZD0201405"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1109\/tnnls.2023.3306801","type":"journal-article","created":{"date-parts":[[2023,11,24]],"date-time":"2023-11-24T19:00:30Z","timestamp":1700852430000},"page":"17608-17619","source":"Crossref","is-referenced-by-count":0,"title":["Master\u2013Slave Deep Architecture for Top-<i>K<\/i> Multiarmed Bandits With Nonlinear Bandit Feedback and Diversity Constraints"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5500-3311","authenticated-orcid":false,"given":"Hanchi","family":"Huang","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Nanyang Technological University, Jurong West, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5659-3464","authenticated-orcid":false,"given":"Li","family":"Shen","sequence":"additional","affiliation":[{"name":"JD Explore Academy, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1754-1837","authenticated-orcid":false,"given":"Deheng","family":"Ye","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3865-8145","authenticated-orcid":false,"given":"Wei","family":"Liu","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Regret bounds for stochastic combinatorial multi-armed bandits with linear space complexity","author":"Agarwal","year":"2018","journal-title":"arXiv:1811.11925"},{"key":"ref2","first-page":"353","article-title":"Learning to branch","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","volume":"80","author":"Balcan"},{"key":"ref3","first-page":"1","article-title":"Neural combinatorial optimization with reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Bello"},{"key":"ref4","first-page":"1","article-title":"Towards minimax policies for online linear optimization with bandit feedback","volume-title":"Proc. JMLR","author":"Bubeck"},{"key":"ref5","first-page":"1","article-title":"On top-k selection in multi-armed bandits and hidden bipartite graphs","volume-title":"Proc. NIPS","volume":"8","author":"Cao"},{"key":"ref6","first-page":"737","article-title":"A gang of bandits","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Cesa-Bianchi"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"ref9","first-page":"208","article-title":"Contextual bandits with linear payoff functions","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist.","author":"Chu"},{"issue":"5","key":"ref10","first-page":"1678","article-title":"A teaching-learning-based optimization algorithm for the weighted set-covering problem","volume":"27","author":"Crawford","year":"2020","journal-title":"Tehni\u010dki vjesnik"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3156066"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2697407"},{"key":"ref13","article-title":"Deep reinforcement learning in large discrete action spaces","author":"Dulac-Arnold","year":"2015","journal-title":"arXiv:1512.07679"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58942-4_12"},{"key":"ref15","first-page":"9971","article-title":"Post: Device placement with cross-entropy minimization and proximal policy optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Gao"},{"key":"ref16","first-page":"15580","article-title":"Exact combinatorial optimization with graph convolutional neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Gasse"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330832"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/239"},{"key":"ref19","first-page":"722","article-title":"Cascading linear submodular bandits: Accounting for position bias and diversity in online learning to rank","volume-title":"Proc. PMLR","author":"Hiranandani"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"ref21","first-page":"3499","article-title":"Stochastic beams and where to find them: The gumbel-top-k trick for sampling sequences without replacement","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","volume":"97","author":"Kool"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_01299"},{"key":"ref23","first-page":"767","article-title":"Cascading bandits: Learning to rank in the cascade model","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kveton"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1017\/9781108571401"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412245"},{"key":"ref26","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. 4th Int. Conf. Learn. Represent.","author":"Lillicrap"},{"key":"ref27","first-page":"901","article-title":"Combinatorial partial monitoring game with linear feedback and its applications","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lin"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6211"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2015.2415257"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"ref31","first-page":"1","article-title":"DPPNET: Approximating determinantal point processes with deep networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Mariet"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/34.232078"},{"volume-title":"Gurobi Optimizer Reference Manual","year":"2014","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/2037661.2037665"},{"key":"ref35","first-page":"752","article-title":"Top-k combinatorial bandits with full-bandit feedback","volume-title":"Proc. PMLR","author":"Rejwan"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3190509"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939704"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3481948"},{"key":"ref39","first-page":"1191","article-title":"Co-training for policy learning","volume-title":"Proc. PMLR","author":"Song"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412236"},{"key":"ref41","first-page":"1","article-title":"Reward constrained policy optimization","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Tessler"},{"key":"ref42","first-page":"14499","article-title":"Censored semi-bandits: A framework for resource allocation with censored feedback","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Verma"},{"key":"ref43","first-page":"1","article-title":"Towards one-shot neural combinatorial solvers: Theoretical and empirical notes on the cardinality-constrained case","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3071392"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2875144"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"ref47","first-page":"11492","article-title":"Neural contextual bandits with ucb-based exploration","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","volume":"119","author":"Zhou"},{"key":"ref48","first-page":"7683","article-title":"Beating stochastic and adversarial semi-bandits optimally and simultaneously","volume-title":"Proc. PMLR","author":"Zimmert"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10772360\/10328656.pdf?arnumber=10328656","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T19:10:14Z","timestamp":1733253014000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10328656\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":48,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2023.3306801","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"type":"print","value":"2162-237X"},{"type":"electronic","value":"2162-2388"}],"subject":[],"published":{"date-parts":[[2024,12]]}}}