{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:52:50Z","timestamp":1775065970474,"version":"3.50.1"},"reference-count":30,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Science and Technology Council (NSTC) of the Republic of China","award":["113-2221-E-001-009-MY3"],"award-info":[{"award-number":["113-2221-E-001-009-MY3"]}]},{"name":"National Science and Technology Council (NSTC) of the Republic of China","award":["113-2221-E-A49-127"],"award-info":[{"award-number":["113-2221-E-A49-127"]}]},{"name":"National Science and Technology Council (NSTC) of the Republic of China","award":["113-2634-F-A49-004"],"award-info":[{"award-number":["113-2634-F-A49-004"]}]},{"name":"National Science and Technology Council (NSTC) of the Republic of China","award":["114-2221-E-A49-005"],"award-info":[{"award-number":["114-2221-E-A49-005"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1109\/tai.2025.3591082","type":"journal-article","created":{"date-parts":[[2025,7,22]],"date-time":"2025-07-22T18:09:18Z","timestamp":1753207758000},"page":"1025-1036","source":"Crossref","is-referenced-by-count":1,"title":["Demystifying MuZero Planning: Interpreting the Learned Model"],"prefix":"10.1109","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5590-7529","authenticated-orcid":false,"given":"Hung","family":"Guei","sequence":"first","affiliation":[{"name":"Institute of Information Science, Academia Sinica, Taipei, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0816-6933","authenticated-orcid":false,"given":"Yan-Ru","family":"Ju","sequence":"additional","affiliation":[{"name":"Institute of Information Science, Academia Sinica, Taipei, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5984-6493","authenticated-orcid":false,"given":"Wei-Yu","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Information Science, Academia Sinica, Taipei, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7532-3176","authenticated-orcid":false,"given":"Ti-Rong","family":"Wu","sequence":"additional","affiliation":[{"name":"Institute of Information Science, Academia Sinica, Taipei, Taiwan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref3","article-title":"Dota 2 with large scale deep reinforcement learning","year":"2019"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref6","article-title":"Policy improvement by planning with Gumbel","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Danihelka","year":"2022"},{"key":"ref7","first-page":"4476","article-title":"Learning and planning in complex action spaces","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Hubert","year":"2021"},{"key":"ref8","article-title":"Planning in stochastic environments with a learned model","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Antonoglou","year":"2021"},{"key":"ref9","article-title":"MuZero with self-competition for rate control in VP9 video compression","author":"Mandhane","year":"2022"},{"key":"ref10","article-title":"Optimizing memory mapping using deep reinforcement learning","author":"Wang","year":"2023"},{"key":"ref11","article-title":"On the role of planning in model-based deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hamrick","year":"2020"},{"key":"ref12","article-title":"Visualizing MuZero models","volume-title":"Proc. ICML Workshop Unsupervised Reinforcement Learn.","author":"Vries","year":"2021"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3233\/faia240666"},{"key":"ref14","first-page":"25476","article-title":"Mastering Atari games with limited data","volume-title":"Advances in Neural Inf. Process. Syst.","volume":"34","author":"Ye","year":"2021"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9534023"},{"key":"ref16","article-title":"Epistemic Monte Carlo tree search","volume-title":"Int. Conf. Learn. Represent.","author":"Oren","year":"2025"},{"key":"ref17","article-title":"LightZero: A unified benchmark for Monte Carlo tree search in general sequential decision scenarios","volume-title":"Proc. 37th Conf. Neural Inf. Process. Syst. Datasets Benchmarks Track","author":"Niu","year":"2023"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/11871842_29"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-75538-8_7"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2012.2186810"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2024.3394900"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3455008"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref26","first-page":"11229","article-title":"Are AlphaZero-like agents robust to adversarial perturbations?","volume":"35","author":"Lan","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref27","first-page":"35655","article-title":"Adversarial policies beat superhuman Go AIs","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","author":"Wang","year":"2023"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-31635-3_22"},{"key":"ref29","article-title":"Mastering diverse domains through world models","author":"Hafner","year":"2023"},{"key":"ref30","article-title":"ChatGPT: Language model by OpenAI,\u201d 2024. [Online]. Available:"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/9078688\/11370309\/11089932.pdf?arnumber=11089932","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T06:00:18Z","timestamp":1770098418000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11089932\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":30,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tai.2025.3591082","relation":{},"ISSN":["2691-4581"],"issn-type":[{"value":"2691-4581","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2]]}}}