{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T02:55:25Z","timestamp":1769741725856,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,2,17]],"date-time":"2023-02-17T00:00:00Z","timestamp":1676592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Science and Technology Innovation 2030 Major Project","award":["2020AAA0104802"],"award-info":[{"award-number":["2020AAA0104802"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906212, 62102441, 9194830"],"award-info":[{"award-number":["61906212, 62102441, 9194830"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,2,17]]},"DOI":"10.1145\/3587716.3587798","type":"proceedings-article","created":{"date-parts":[[2023,9,7]],"date-time":"2023-09-07T23:27:30Z","timestamp":1694129250000},"page":"492-499","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Interpretable Reinforcement Learning of Behavior Trees"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8026-9848","authenticated-orcid":false,"given":"Chenjing","family":"Zhao","sequence":"first","affiliation":[{"name":"Defense Innovation Institute, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8930-2183","authenticated-orcid":false,"given":"Chuanshuai","family":"Deng","sequence":"additional","affiliation":[{"name":"Defense Innovation Institute, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3691-9074","authenticated-orcid":false,"given":"Zhenghui","family":"Liu","sequence":"additional","affiliation":[{"name":"Defense Innovation Institute, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5488-9386","authenticated-orcid":false,"given":"Jiexin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Defense Innovation Institute, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6911-954X","authenticated-orcid":false,"given":"Yunlong","family":"Wu","sequence":"additional","affiliation":[{"name":"Defense Innovation Institute, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3104-4845","authenticated-orcid":false,"given":"Yanzhen","family":"Wang","sequence":"additional","affiliation":[{"name":"Defense Innovation Institute, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2279-5417","authenticated-orcid":false,"given":"Xiaodong","family":"Yi","sequence":"additional","affiliation":[{"name":"Defense Innovation Institute, China"}]}],"member":"320","published-online":{"date-parts":[[2023,9,7]]},"reference":[{"issue":"3","key":"e_1_3_2_1_1_1","first-page":"54","article-title":"The dark secret at the heart of ai","volume":"120","author":"Will Knight","year":"2017","unstructured":"[1] Will Knight. The dark secret at the heart of ai. Technology review, 120(3):54\u201363, 2017.","journal-title":"Technology review"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1201\/9780429489105"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of Gdc","author":"Isla","year":"2005","unstructured":"[3] D.\u00a0Isla. Handling complexity in the halo 2 ai. Proceedings of Gdc, 2005."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MIS.2002.1024751"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385888"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2014.6942752"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2016.2633567"},{"key":"e_1_3_2_1_8_1","volume-title":"Behavior modeling in commercial games","author":"Diller W.","year":"2004","unstructured":"[8] D.\u00a0E. Diller, W.\u00a0Ferguson, A.\u00a0M. Leung, B.\u00a0Benyo, and D.\u00a0Foley. Behavior modeling in commercial games. 2004."},{"key":"e_1_3_2_1_9_1","volume-title":"Costar: Instructing collaborative robots with behavior trees and vision","author":"Paxton A.","year":"2016","unstructured":"[9] C.\u00a0Paxton, A.\u00a0Hundt, F.\u00a0Jonathan, K.\u00a0Guerin, and G.\u00a0D. Hager. Costar: Instructing collaborative robots with behavior trees and vision. 2016."},{"key":"e_1_3_2_1_10_1","volume-title":"Genetic programming : on the programming of computers by means of natural selection. Genetic programming : on the programming of computers by means of natural selection","author":"Koza R.","year":"1992","unstructured":"[10] Koza and JohnR. Genetic programming : on the programming of computers by means of natural selection. Genetic programming : on the programming of computers by means of natural selection, 1992."},{"key":"e_1_3_2_1_11_1","volume-title":"Grammatical Evolution: Evolutionary Automatic Programming in an Arbitrary Language. Grammatical Evolution: Evolutionary Automatic Programming in an Arbitrary Language","author":"O\u2019Neill C.","year":"2003","unstructured":"[11] M.\u00a0O\u2019Neill and C.\u00a0Ryan. Grammatical Evolution: Evolutionary Automatic Programming in an Arbitrary Language. Grammatical Evolution: Evolutionary Automatic Programming in an Arbitrary Language, 2003."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/551283"},{"key":"e_1_3_2_1_13_1","volume-title":"Conservative q-improvement: Reinforcement learning for an interpretable decision-tree policy","author":"Roth N.","year":"2019","unstructured":"[13] A.\u00a0M. Roth, N.\u00a0Topin, P.\u00a0Jamshidi, and M.\u00a0Veloso. Conservative q-improvement: Reinforcement learning for an interpretable decision-tree policy. 2019."},{"issue":"1","key":"e_1_3_2_1_14_1","first-page":"77","article-title":"A framework for constrained and adaptive behavior-based agents","volume":"6","author":"Rdp Pereira P.","year":"2015","unstructured":"[14] Rdp Pereira and P.\u00a0M. Engel. A framework for constrained and adaptive behavior-based agents. Computer ence, 6(1):77\u2013107, 2015.","journal-title":"Computer ence"},{"key":"e_1_3_2_1_15_1","first-page":"6","article-title":"A markov decision process","author":"Bellman","year":"1957","unstructured":"[15] R.\u00a0E. Bellman. A markov decision process. Journal of Mathematical Fluid Mechanics, 6, 1957.","journal-title":"Journal of Mathematical Fluid Mechanics"},{"key":"e_1_3_2_1_16_1","volume-title":"Learning from delayed rewards. Ph.d.thesis Kings College University of Cambridge","author":"Christopher John Cornish\u00a0Hellaby","year":"1989","unstructured":"[16] Christopher John Cornish\u00a0Hellaby. Watkins. Learning from delayed rewards. Ph.d.thesis Kings College University of Cambridge, 1989."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-12239-2_11"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-20525-5_13"},{"key":"e_1_3_2_1_19_1","volume-title":"2018 IEEE 7th Data Driven Control and Learning Systems Conference (DDCLS)","author":"Qi X.","year":"2018","unstructured":"[19] Z.\u00a0Qi, X.\u00a0Kai, J.\u00a0Peng, and Q.\u00a0Yin. Behavior modeling for autonomous agents based on modified evolving behavior trees. In 2018 IEEE 7th Data Driven Control and Learning Systems Conference (DDCLS), 2018."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2013.6633623"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.2991\/essaeme-16.2016.120"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594083"}],"event":{"name":"ICMLC 2023: 2023 15th International Conference on Machine Learning and Computing","location":"Zhuhai China","acronym":"ICMLC 2023"},"container-title":["Proceedings of the 2023 15th International Conference on Machine Learning and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3587716.3587798","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3587716.3587798","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:08:00Z","timestamp":1750183680000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3587716.3587798"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,17]]},"references-count":22,"alternative-id":["10.1145\/3587716.3587798","10.1145\/3587716"],"URL":"https:\/\/doi.org\/10.1145\/3587716.3587798","relation":{},"subject":[],"published":{"date-parts":[[2023,2,17]]},"assertion":[{"value":"2023-09-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}