{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T13:49:48Z","timestamp":1769003388534,"version":"3.49.0"},"reference-count":88,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Major Science and Technology Innovation 2030"},{"name":"New Generation Artificial Intelligence","award":["2021ZD0112904"],"award-info":[{"award-number":["2021ZD0112904"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1109\/tai.2023.3323628","type":"journal-article","created":{"date-parts":[[2023,10,11]],"date-time":"2023-10-11T13:57:55Z","timestamp":1697032675000},"page":"2410-2421","source":"Crossref","is-referenced-by-count":8,"title":["Nuclear Norm Maximization-Based Curiosity-Driven Reinforcement Learning"],"prefix":"10.1109","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4649-4061","authenticated-orcid":false,"given":"Chao","family":"Chen","sequence":"first","affiliation":[{"name":"School of Computer, National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1385-0074","authenticated-orcid":false,"given":"Yuanzhao","family":"Zhai","sequence":"additional","affiliation":[{"name":"School of Computer, National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5151-3381","authenticated-orcid":false,"given":"Zijian","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Computer, National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5997-5169","authenticated-orcid":false,"given":"Kele","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer, National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3222-2268","authenticated-orcid":false,"given":"Sen","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Computer, National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2632-5175","authenticated-orcid":false,"given":"Yiying","family":"Li","sequence":"additional","affiliation":[{"name":"Artificial Intelligence Research Center, DII, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1236-8318","authenticated-orcid":false,"given":"Bo","family":"Ding","sequence":"additional","affiliation":[{"name":"School of Computer, National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7587-8905","authenticated-orcid":false,"given":"Dawei","family":"Feng","sequence":"additional","affiliation":[{"name":"School of Computer, National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3245-1901","authenticated-orcid":false,"given":"Huaimin","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer, National University of Defense Technology, Changsha, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref3","first-page":"4344","article-title":"Learning by playing solving sparse reward tasks from scratch","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Riedmiller","year":"2018"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2805379"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-019-13239-6"},{"key":"ref9","article-title":"Exploration by random network distillation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Burda","year":"2018"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3145971"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/733"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.03.003"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2891792"},{"key":"ref14","article-title":"Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","volume":"29","author":"Kulkarni","year":"2016","journal-title":"Proc. Adv. Neural Inf. Process. Syst."},{"key":"ref15","first-page":"1331","article-title":"Curious: Intrinsically motivated modular multi-goal reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Colas","year":"2019"},{"key":"ref16","first-page":"8622","article-title":"Adversarial intrinsic motivation for reinforcement learning","volume":"34","author":"Durugkar","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref17","article-title":"A survey on intrinsic motivation in reinforcement learning","author":"Aubret","year":"2019"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2018.03.014"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref20","article-title":"Large-scale study of curiosity-driven learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Burda","year":"2018"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1006\/ceps.1999.1020"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1162\/1064546053278973"},{"key":"ref23","first-page":"18459","article-title":"Behavior from the void: Unsupervised active pre-training","volume":"34","author":"Liu","year":"2021","journal-title":"Proc. Adv. Neural Inf. Process. Syst."},{"key":"ref24","first-page":"5062","article-title":"Self-supervised exploration via disagreement","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pathak","year":"2019"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.330110009"},{"key":"ref26","article-title":"LIIR: Learning individual intrinsic reward in multi-agent reinforcement learning","volume":"32","author":"Du","year":"2019","journal-title":"Proc. Adv. Neural Inf. Process. Syst."},{"key":"ref27","first-page":"213","article-title":"R-max-a general polynomial time algorithm for near-optimal reinforcement learning","volume":"3","author":"Brafman","year":"2002","journal-title":"J. Mach. Learn. Res."},{"key":"ref28","first-page":"3379","article-title":"Curiosity-bottleneck: Exploration by distilling task-specific novelty","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim","year":"2019"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"key":"ref30","article-title":"Scheduled intrinsic drive: A hierarchical take on intrinsically motivated exploration","author":"Zhang","year":"2019"},{"key":"ref31","first-page":"16","article-title":"Variational state encoding as intrinsic motivation in reinforcement learning","volume-title":"Proc. Task-Agnost. Reinforcement Learn. Workshop Int. Conf. Learn. Representations","author":"Klissarov","year":"2019"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i7.20721"},{"key":"ref33","first-page":"3757","article-title":"Episodic multi-agent reinforcement learning with curiosity-driven exploration","volume":"34","author":"Zheng","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/3115.003.0030"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21236\/ADA440280"},{"key":"ref36","article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"Stadie","year":"2015"},{"key":"ref37","first-page":"8114","article-title":"Novelty search in representational space for sample efficient exploration","volume":"33","author":"Tao","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref38","first-page":"11920","article-title":"Reinforcement learning with prototypical representations","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Yarats","year":"2021"},{"key":"ref39","article-title":"Never give up: Learning directed exploration strategies","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Badia","year":"2019"},{"key":"ref40","first-page":"15220","article-title":"How to stay curious while avoiding noisy TVs using aleatoric uncertainty estimation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mavor-Parker","year":"2022"},{"key":"ref41","article-title":"Sample efficient deep reinforcement learning via uncertainty estimation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mai","year":"2021"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1609\/aiide.v18i1.21959"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.simpa.2020.100022"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref45","article-title":"Exploration in deep reinforcement learning: A comprehensive survey","author":"Yang","year":"2021"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/0196-8858(85)90002-8"},{"key":"ref47","first-page":"592","article-title":"On Bayesian upper confidence bounds for bandit problems","volume-title":"Proc. Artif. Intell. Statist.","author":"Kaufmann","year":"2012"},{"key":"ref48","article-title":"Gaussian process optimization in the bandit setting: No regret and experimental design","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Srinivas","year":"2010"},{"key":"ref49","first-page":"397","article-title":"Using confidence bounds for exploitation-exploration trade-offs","volume":"3","author":"Auer","year":"2002","journal-title":"J. Mach. Learn. Res."},{"issue":"6","key":"ref50","article-title":"Entropy search for information-efficient global optimization","volume":"13","author":"Hennig","year":"2012","journal-title":"J. Mach. Learn. Res."},{"key":"ref51","first-page":"1699","article-title":"Predictive entropy search for Bayesian optimization with unknown constraints","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hernndez-Lobato","year":"2015"},{"key":"ref52","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref53","article-title":"Efficient exploration via state marginal matching","author":"Lee","year":"2019"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2006.890271"},{"key":"ref55","first-page":"1","article-title":"Large-scale study of curiosity-driven learning","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"Burda","year":"2019"},{"key":"ref56","first-page":"1471","article-title":"Unifying count-based exploration and intrinsic motivation","volume":"29","author":"Bellemare","year":"2016","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref57","first-page":"2721","article-title":"Count-based exploration with neural density models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ostrovski","year":"2017"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/344"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CoG47356.2020.9231562"},{"key":"ref60","article-title":"Episodic curiosity through reachability","author":"Savinov","year":"2018","journal-title":"Proc. Int. Conf. Learn. Representations"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/78.942635"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3078329"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2021.3065700"},{"key":"ref64","first-page":"2887","article-title":"Low-rank constraints for fast inference in structured models","volume":"34","author":"Chiu","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2014.2309712"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/2339530.2339612"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1137\/18m1183480"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/2674559"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bty013"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btz139"},{"key":"ref71","first-page":"720","article-title":"Weighted low-rank approximations","author":"Srebro","year":"2003","journal-title":"Proc. 20th Int. Conf. Mach. Learn."},{"key":"ref72","article-title":"Factor group-sparse regularization for efficient low-rank matrix recovery","volume":"32","author":"Fan","year":"2019","journal-title":"Proc. Adv. Neural Inf. Process. Syst."},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2020.09.021"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00400"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3121765"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2018.2879185"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1080\/01630563.2021.1922438"},{"key":"ref78","first-page":"21453","article-title":"A novel variational form of the schatten-$ p$ quasi-norm","volume":"33","author":"Giampouras","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref79","article-title":"Matrix rank minimization with applications","author":"Fazel","year":"2002"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.382"},{"key":"ref81","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref83","article-title":"URLB: Unsupervised reinforcement learning benchmark","volume-title":"Proc. Deep RL Workshop NeurIPS","author":"Laskin","year":"2021"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"ref85","article-title":"Image augmentation is all you need: Regularizing deep reinforcement learning from pixels","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yarats","year":"2020"},{"key":"ref86","article-title":"Data-efficient reinforcement learning with self-predictive representations","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Schwarzer","year":"2020"},{"key":"ref87","first-page":"14961","article-title":"See, hear, explore: Curiosity via audio-visual association","volume":"33","author":"Dean","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref88","first-page":"6736","article-title":"APS: Active pretraining with successor features","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu","year":"2021"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9078688\/10532210\/10278488.pdf?arnumber=10278488","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:08:55Z","timestamp":1755911335000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10278488\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":88,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tai.2023.3323628","relation":{},"ISSN":["2691-4581"],"issn-type":[{"value":"2691-4581","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5]]}}}