{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T15:12:29Z","timestamp":1775142749073,"version":"3.50.1"},"reference-count":49,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["71701205"],"award-info":[{"award-number":["71701205"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62001495"],"award-info":[{"award-number":["62001495"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004735","name":"Natural Science Foundation of Hunan Province","doi-asserted-by":"publisher","award":["2020JJ5675"],"award-info":[{"award-number":["2020JJ5675"]}],"id":[{"id":"10.13039\/501100004735","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1109\/tcyb.2021.3116762","type":"journal-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T18:20:10Z","timestamp":1634754010000},"page":"2311-2324","source":"Crossref","is-referenced-by-count":8,"title":["Graph-Attention-Based Casual Discovery With Trust Region-Navigated Clipping Policy Optimization"],"prefix":"10.1109","volume":"53","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4780-3890","authenticated-orcid":false,"given":"Shixuan","family":"Liu","sequence":"first","affiliation":[{"name":"College of Systems Engineering, National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1608-8695","authenticated-orcid":false,"given":"Yanghe","family":"Feng","sequence":"additional","affiliation":[{"name":"College of Systems Engineering, National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Keyu","family":"Wu","sequence":"additional","affiliation":[{"name":"College of Systems Engineering, National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangquan","family":"Cheng","sequence":"additional","affiliation":[{"name":"College of Systems Engineering, National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jincai","family":"Huang","sequence":"additional","affiliation":[{"name":"College of Systems Engineering, National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhong","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Systems Engineering, National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1162\/jocn.2009.21387"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3501714.3501755"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1186\/1752-0509-1-37"},{"key":"ref4","volume-title":"Causation, Prediction, and Search, Second Edition","author":"Spirtes","year":"2000"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-2404-4_12"},{"key":"ref6","first-page":"1","article-title":"Causal discovery with reinforcement learning","volume-title":"Proc. 8th Int. Conf. Learn. Represent.","author":"Zhu"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176344136"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/bf00992696"},{"key":"ref9","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Advances in Neural Information Processing Systems","volume":"12","author":"Sutton","year":"1999"},{"key":"ref10","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","volume":"37","author":"Schulman"},{"key":"ref11","volume-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref12","volume-title":"Graph attention networks","author":"Velickovic","year":"2017"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1968.1054142"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-006-6889-7"},{"key":"ref15","first-page":"9492","article-title":"Dags with NO TEARS: continuous optimization for structure learning","volume-title":"Proc. NeurIPS","author":"Zheng"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220104"},{"issue":"1","key":"ref17","first-page":"2009","article-title":"Causal discovery with continuous additive noise models","volume":"15","author":"Peters","year":"2014","journal-title":"J. Mach. Learn. Res."},{"issue":"2","key":"ref18","first-page":"154","article-title":"A machine learning approach to classify pedestrians\u2019 event based on IMU and GPS","volume":"17","author":"Ahmed","year":"2019","journal-title":"Int. J. Artif. Intell."},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.3029338"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.3182\/20070709-3-RO-4910.00004"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2015.2488597"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-98131-4_3"},{"key":"ref23","volume-title":"Structural agnostic modeling: Adversarial learning of causal graphs","author":"Kalainathan","year":"2018"},{"key":"ref24","first-page":"1","article-title":"DAG-GNN: DAG structure learning with graph neural networks","volume-title":"Proc. ICML","author":"Yu"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2005.1555942"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2008.2005605"},{"key":"ref27","volume-title":"ChebNet: Efficient and stable constructions of deep neural networks with rectified power units using chebyshev approximations","author":"Tang","year":"2019"},{"key":"ref28","volume-title":"Inductive representation learning on large graphs","author":"Hamilton","year":"2017"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2977374"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-017-0468-y"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2890974"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2018.2885813"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2950262"},{"key":"ref34","volume-title":"Neural architecture search with reinforcement learning","author":"Zoph","year":"2016"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1002\/9781118619179"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/1754.001.0001"},{"key":"ref37","first-page":"2003","article-title":"A linear non-Gaussian acyclic model for causal discovery","volume":"7","author":"Shimizu","year":"2006","journal-title":"J. Mach. Learn. Res."},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K16-1028"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1080\/09540099108946587"},{"key":"ref41","volume-title":"Prioritized experience replay","author":"Schaul","year":"2015"},{"key":"ref42","volume-title":"Are deep policy gradient algorithms truly policy gradient algorithms?","author":"Ilyas","year":"2018"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/s41060-016-0032-z"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1214\/14-aos1260"},{"key":"ref45","volume-title":"Gradient-based neural DAG learning","author":"Lachapelle","year":"2019"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1126\/science.1105809"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-7-43"},{"key":"ref48","first-page":"113","article-title":"Truly proximal policy optimization","volume-title":"Proc. UAI Mach. Learn. Res.","volume":"115","author":"Wang"},{"key":"ref49","article-title":"A hybrid method for nonlinear equations","volume-title":"Numerical Methods for Nonlinear Algebraic Equations","author":"Powell","year":"1970"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/10073969\/09580556.pdf?arnumber=9580556","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T23:38:04Z","timestamp":1705016284000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9580556\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4]]},"references-count":49,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2021.3116762","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"value":"2168-2267","type":"print"},{"value":"2168-2275","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,4]]}}}