{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T08:51:17Z","timestamp":1767084677174,"version":"3.37.3"},"reference-count":66,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Knowl. Data Eng."],"published-print":{"date-parts":[[2023,10,1]]},"DOI":"10.1109\/tkde.2022.3186920","type":"journal-article","created":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T19:33:00Z","timestamp":1659555180000},"page":"9878-9889","source":"Crossref","is-referenced-by-count":7,"title":["Generative Adversarial Reward Learning for Generalized Behavior Tendency Inference"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8849-4943","authenticated-orcid":false,"given":"Xiaocong","family":"Chen","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, University of New South Wales, Sydney, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4149-839X","authenticated-orcid":false,"given":"Lina","family":"Yao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of New South Wales, Sydney, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9582-3445","authenticated-orcid":false,"given":"Xianzhi","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science, University of Technology Sydney, Sydney, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0764-4258","authenticated-orcid":false,"given":"Aixin","family":"Sun","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3326-4147","authenticated-orcid":false,"given":"Quan Z.","family":"Sheng","sequence":"additional","affiliation":[{"name":"Department of Computing, Macquarie University, Sydney, NSW, Australia"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00027"},{"article-title":"Framework for selecting and delivering advertisements over a network based on combined short-term and long-term user behavioral interests","year":"2007","author":"liu","key":"ref57"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2556270"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00111"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8462837"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2914726"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-008-9062-9"},{"article-title":"Incremental update of long-term and short-term user profile scores in a behavioral targeting system","year":"2011","author":"chung","key":"ref58"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.312"},{"key":"ref52","first-page":"1052","article-title":"Generative adversarial user model for reinforcement learning based recommendation system","author":"chen","year":"2019","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2043932.2043953"},{"article-title":"Addressing function approximation error in actor-critic methods","year":"2018","author":"fujimoto","key":"ref55"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794254"},{"article-title":"Delving deeper into convolutional networks for learning video representations","year":"2015","author":"ballas","key":"ref54"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"article-title":"A survey of deep reinforcement learning in recommender systems: A systematic review and future directions","year":"2021","author":"chen","key":"ref16"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013312"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220122"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313616"},{"key":"ref50","first-page":"10 735","article-title":"A model-based reinforcement learning with adversarial training for online recommendation","author":"bai","year":"2019","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569938"},{"key":"ref45","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"Proc Adv Neural Inform Process Syst"},{"article-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref48"},{"article-title":"Reinforcement learning from imperfect demonstrations","year":"2018","author":"gao","key":"ref47"},{"key":"ref42","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref41","first-page":"700","article-title":"Are gans created equal? A large-scale study","author":"lucic","year":"2018","journal-title":"Proc Adv Neural Inform Process Syst"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref44"},{"article-title":"High-dimensional continuous control using generalized advantage estimation","year":"2015","author":"schulman","key":"ref43"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014902"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1177\/0018720816644364"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/604045.604064"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460766"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/258"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3158369"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2623372"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6125"},{"key":"ref40","volume":"338","author":"villani","year":"2008","journal-title":"Optimal Transport Old and New"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207010"},{"key":"ref34","first-page":"1008","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Proc Adv Neural Inform Process Syst"},{"article-title":"Adam: A method for stochastic optimization","year":"2014","author":"kingma","key":"ref37"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371858"},{"key":"ref31","article-title":"Learning robust rewards with adverserial inverse reinforcement learning","author":"fu","year":"2018","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1214\/08-AOS595"},{"key":"ref33","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref32","article-title":"Discriminator-actor-critic: Addressing sample inefficiency and reward bias in adversarial imitation learning","author":"kostrikov","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2008.09.002"},{"article-title":"Wasserstein gan","year":"2017","author":"arjovsky","key":"ref39"},{"article-title":"Sample efficient actor-critic with experience replay","year":"2016","author":"wang","key":"ref38"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref23","first-page":"5767","article-title":"Improved training of wasserstein GANs","author":"gulrajani","year":"2017","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref26","first-page":"1449","article-title":"A game-theoretic approach to apprenticeship learning","author":"syed","year":"2008","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390286"},{"key":"ref20","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"2000","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330933"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401134"},{"article-title":"Generative inverse deep reinforcement learning for online recommendation","year":"2020","author":"chen","key":"ref22"},{"article-title":"Prioritized experience replay","year":"2015","author":"schaul","key":"ref66"},{"key":"ref21","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"2016","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/1833349.1778859"},{"key":"ref28","first-page":"1255","article-title":"Modeling interaction via the principle of maximum causal entropy","author":"ziebart","year":"2010","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2014.7040156"},{"key":"ref29","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330666"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401171"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371801"}],"container-title":["IEEE Transactions on Knowledge and Data Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/69\/10251471\/09849003.pdf?arnumber=9849003","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T18:12:25Z","timestamp":1696270345000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9849003\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,1]]},"references-count":66,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tkde.2022.3186920","relation":{},"ISSN":["1041-4347","1558-2191","2326-3865"],"issn-type":[{"type":"print","value":"1041-4347"},{"type":"electronic","value":"1558-2191"},{"type":"electronic","value":"2326-3865"}],"subject":[],"published":{"date-parts":[[2023,10,1]]}}}