{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T16:11:15Z","timestamp":1781021475860,"version":"3.54.1"},"reference-count":24,"publisher":"Wiley","license":[{"start":{"date-parts":[[2020,5,11]],"date-time":"2020-05-11T00:00:00Z","timestamp":1589155200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61873291"],"award-info":[{"award-number":["61873291"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61773416"],"award-info":[{"award-number":["61773416"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61873291"],"award-info":[{"award-number":["61873291"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61773416"],"award-info":[{"award-number":["61773416"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"MUC 111 Project","award":["61873291"],"award-info":[{"award-number":["61873291"]}]},{"name":"MUC 111 Project","award":["61773416"],"award-info":[{"award-number":["61773416"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Complexity"],"published-print":{"date-parts":[[2020,5,11]]},"abstract":"<jats:p>In this study, hybrid state-action-reward-state-action (SARSA<mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" id=\"M1\"><mml:mrow><mml:mfenced open=\"(\" close=\")\" separators=\"|\"><mml:mrow><mml:mi>\u03bb<\/mml:mi><\/mml:mrow><\/mml:mfenced><\/mml:mrow><\/mml:math>) and Q-learning algorithms are applied to different stages of an upper confidence bound applied to tree search for Tibetan Jiu chess. Q-learning is also used to update all the nodes on the search path when each game ends. A learning strategy that uses SARSA<mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" id=\"M2\"><mml:mrow><mml:mfenced open=\"(\" close=\")\" separators=\"|\"><mml:mrow><mml:mi>\u03bb<\/mml:mi><\/mml:mrow><\/mml:mfenced><\/mml:mrow><\/mml:math> and Q-learning algorithms combining domain knowledge for a feedback function for layout and battle stages is proposed. An improved deep neural network based on ResNet18 is used for self-play training. Experimental results show that hybrid online and offline reinforcement learning with a deep neural network can improve the game program\u2019s learning efficiency and understanding ability for Tibetan Jiu chess.<\/jats:p>","DOI":"10.1155\/2020\/4708075","type":"journal-article","created":{"date-parts":[[2020,5,11]],"date-time":"2020-05-11T23:47:56Z","timestamp":1589240876000},"page":"1-11","source":"Crossref","is-referenced-by-count":9,"title":["Hybrid Online and Offline Reinforcement Learning for Tibetan Jiu Chess"],"prefix":"10.1155","volume":"2020","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7950-6204","authenticated-orcid":true,"given":"Xiali","family":"Li","sequence":"first","affiliation":[{"name":"School of Information and Engineering, Minzu University of China, Beijing 100081, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0718-305X","authenticated-orcid":true,"given":"Zhengyu","family":"Lv","sequence":"additional","affiliation":[{"name":"School of Information and Engineering, Minzu University of China, Beijing 100081, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Licheng","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Information and Engineering, Minzu University of China, Beijing 100081, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yue","family":"Zhao","sequence":"additional","affiliation":[{"name":"School of Information and Engineering, Minzu University of China, Beijing 100081, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8715-1618","authenticated-orcid":true,"given":"Xiaona","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Information and Engineering, Minzu University of China, Beijing 100081, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"311","reference":[{"key":"1","doi-asserted-by":"publisher","DOI":"10.3233\/icg-180058"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1109\/access.2019.2938240"},{"issue":"2","key":"4","first-page":"5","volume":"9","year":"1994","journal-title":"Journal of Tibet University"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1109\/access.2019.2937943"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1007\/bf00115009"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.2.215"},{"key":"29","first-page":"97","year":"1997","journal-title":"Games in AI Research"},{"key":"30","doi-asserted-by":"publisher","DOI":"10.3233\/icg-2010-33203"},{"key":"32","doi-asserted-by":"publisher","DOI":"10.1093\/comjnl\/21.2.149"},{"key":"34","doi-asserted-by":"publisher","DOI":"10.1016\/s0004-3702(01)00129-1"},{"key":"36","doi-asserted-by":"publisher","DOI":"10.1126\/science.aao1733"},{"key":"40","doi-asserted-by":"publisher","DOI":"10.1109\/jsac.2015.2393496"},{"key":"41","doi-asserted-by":"publisher","DOI":"10.1109\/tciaig.2013.2291577"},{"issue":"3-4","key":"43","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","year":"1992","journal-title":"Machine Learning"},{"issue":"1","key":"44","first-page":"77","volume":"34","year":"2013","journal-title":"Journal of China Institute of Communications"},{"key":"45","doi-asserted-by":"publisher","DOI":"10.1016\/j.laa.2018.06.026"},{"key":"46","doi-asserted-by":"publisher","DOI":"10.3389\/fncom.2012.00087"},{"key":"47","doi-asserted-by":"publisher","DOI":"10.1109\/jsyst.2016.2550530"},{"key":"48","doi-asserted-by":"publisher","DOI":"10.1109\/comst.2018.2812301"},{"key":"49","doi-asserted-by":"publisher","DOI":"10.1155\/2019\/9487574"},{"key":"50","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2019.2891259"}],"container-title":["Complexity"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/downloads.hindawi.com\/journals\/complexity\/2020\/4708075.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/downloads.hindawi.com\/journals\/complexity\/2020\/4708075.xml","content-type":"application\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/downloads.hindawi.com\/journals\/complexity\/2020\/4708075.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,11]],"date-time":"2020-05-11T23:48:02Z","timestamp":1589240882000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.hindawi.com\/journals\/complexity\/2020\/4708075\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5,11]]},"references-count":24,"alternative-id":["4708075","4708075"],"URL":"https:\/\/doi.org\/10.1155\/2020\/4708075","relation":{},"ISSN":["1076-2787","1099-0526"],"issn-type":[{"value":"1076-2787","type":"print"},{"value":"1099-0526","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,5,11]]}}}