{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T13:39:01Z","timestamp":1771681141688,"version":"3.50.1"},"reference-count":38,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"New Generation of AI 2030","award":["2018AAA0100900"],"award-info":[{"award-number":["2018AAA0100900"]}]},{"name":"Shanghai Municipal Science and Technology Major Project","award":["2021SHZDZX0102"],"award-info":[{"award-number":["2021SHZDZX0102"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076161"],"award-info":[{"award-number":["62076161"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62177033"],"award-info":[{"award-number":["62177033"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Huawei Innovation Research Program"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Knowl. Data Eng."],"published-print":{"date-parts":[[2023,4,1]]},"DOI":"10.1109\/tkde.2021.3137310","type":"journal-article","created":{"date-parts":[[2021,12,23]],"date-time":"2021-12-23T23:01:30Z","timestamp":1640300490000},"page":"4018-4032","source":"Crossref","is-referenced-by-count":11,"title":["Large-Scale Interactive Recommendation With Tree-Structured Reinforcement Learning"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5485-2984","authenticated-orcid":false,"given":"Haokun","family":"Chen","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8320-6845","authenticated-orcid":false,"given":"Chenxu","family":"Zhu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Ruiming","family":"Tang","sequence":"additional","affiliation":[{"name":"Huawei Noah&#x0027;s Ark Lab, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0127-2425","authenticated-orcid":false,"given":"Weinan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Xiuqiang","family":"He","sequence":"additional","affiliation":[{"name":"Huawei Noah&#x0027;s Ark Lab, Shenzhen, China"}]},{"given":"Yong","family":"Yu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2505515.2505690"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/1148170.1148257"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v28i1.8703"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2983323.2983847"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018702"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013312"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219846"},{"key":"ref15","article-title":"Deep reinforcement learning in large discrete action spaces","author":"Dulac-Arnold","year":"2015"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/239"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401063"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380164"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401042"},{"key":"ref21","first-page":"2249","article-title":"An empirical evaluation of thompson sampling","volume-title":"Proc. 24th Int. Conf. Neural Inf. Process. Syst.","author":"Chapelle"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939878"},{"key":"ref23","first-page":"1297","article-title":"Efficient thompson sampling for online matrix-factorization recommendation","volume-title":"Proc. 28th Int. Conf. Neural Inf. Process. Syst.","author":"Kawale"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10936"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080677"},{"key":"ref26","article-title":"Model-based reinforcement learning for whole-chain recommendations","author":"Zhao","year":"2019"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2914798"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2321376"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11798"},{"key":"ref31","first-page":"941","article-title":"Learning action representations for reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Chandak"},{"key":"ref32","article-title":"Auto-encoding variational bayes","author":"Kingma","year":"2013"},{"key":"ref33","article-title":"Training RNNs as fast as CNNs","author":"Lei","year":"2017"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3618-5_2"},{"key":"ref35","first-page":"1257","article-title":"Probabilistic matrix factorization","volume-title":"Proc. 20th Int. Conf. Neural Inf. Process. Syst.","author":"Mnih"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10153"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10804"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/963770.963772"}],"container-title":["IEEE Transactions on Knowledge and Data Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/69\/10063074\/09661309.pdf?arnumber=9661309","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,10]],"date-time":"2024-01-10T00:06:34Z","timestamp":1704845194000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9661309\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,1]]},"references-count":38,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tkde.2021.3137310","relation":{},"ISSN":["1041-4347","1558-2191","2326-3865"],"issn-type":[{"value":"1041-4347","type":"print"},{"value":"1558-2191","type":"electronic"},{"value":"2326-3865","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,4,1]]}}}