{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:45:45Z","timestamp":1750308345770,"version":"3.41.0"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["72464022","62366019","52160009"],"award-info":[{"award-number":["72464022","62366019","52160009"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Training Program for Academic and Technical Leaders of Major Disciplines in Jiangxi Province","award":["20212BCJL23054"],"award-info":[{"award-number":["20212BCJL23054"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Games"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/tg.2024.3507107","type":"journal-article","created":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T19:23:38Z","timestamp":1733340218000},"page":"485-497","source":"Crossref","is-referenced-by-count":0,"title":["Research on the Imperfect Information Game of Four-Player <i>Mahjong<\/i> Based on Mix-PPO"],"prefix":"10.1109","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5551-8611","authenticated-orcid":false,"given":"Jia-Yang","family":"Wang","sequence":"first","affiliation":[{"name":"Software College, Jiangxi Agricultural University, Nanchang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5713-8032","authenticated-orcid":false,"given":"Ming-Yan","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Gongqing Institute of Science and Technology, Gongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wang","family":"Zeng","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Nanchang University, Nanchang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9529-7317","authenticated-orcid":false,"given":"Zi-An","family":"Zhong","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Nanchang University, Nanchang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"ref3","article-title":"Simple random search provides a competitive approach to reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Mania","year":"2018"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s11431-022-2063-8"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.23919\/CCC55666.2022.9902417"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-16-6554-7_35"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CCDC52312.2021.9602755"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-16-5940-9_19"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1002\/ps.5703"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1002\/ps.6850"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.3390\/sym14010161"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3389\/fchem.2021.809850"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3390\/agriculture11080760"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1017\/wet.2020.86"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1017\/wsc.2020.42"},{"article-title":"Playing atari with deep reinforcement learning","year":"2013","author":"Mnih","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref19","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. 33rd Int. Conf. Int. Conf. Mach. Learn.","author":"Wang","year":"2016"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.3390\/buildings12020131"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.micpro.2022.104450"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30217-9_98"},{"issue":"2","key":"ref23","first-page":"24","article-title":"Orbit-tracking evasion game method based on multi-agent reinforcement learning","volume":"39","author":"Xu","year":"2022","journal-title":"Shanghai Astronaut. Chin. English"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1088\/1361-6501\/ac50e7"},{"issue":"3","key":"ref25","first-page":"223","article-title":"Strategy gradient recommendation algorithm with fusion of sequential pattern scoring","volume":"39","author":"Guan","year":"2022","journal-title":"Comput. Appl. Softw."},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.3390\/app12063181"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3150802"},{"key":"ref28","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman","year":"2015"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.12794\/metadc1505267"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3390\/app12063078"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.3233\/JIFS-211935"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.3390\/su14095177"},{"article-title":"Emergence of locomotion behaviours in rich environments","year":"2017","author":"Heess","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2021.3057523"},{"key":"ref35","first-page":"618","article-title":"Asynchronous distributed proximal policy optimization training framework based on GPU","volume-title":"Proc. Chin. Intell. Autom. Conf.","author":"Chen","year":"2022"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref37","first-page":"1","article-title":"Thousandwind: Design and implementation of a computer Mahjong program","volume-title":"Proc. Taiwan Annu. Conf. Artif. Intell.","author":"Chen","year":"2013"},{"article-title":"Artificial intelligence research on Mahjong","year":"2008","author":"Lin","key":"ref38"},{"issue":"11","key":"ref39","first-page":"2410","article-title":"Four-person Mahjong program by supervised learning dividing plurality","volume":"55","author":"Mizukami","year":"2014","journal-title":"J. Inf. Process. Soc. Jpn."},{"key":"ref40","first-page":"163","article-title":"CNN MahjongEffectiveness of CNN architecture for Mahjong","volume-title":"Proc. Game Program. Workshop","author":"Tsukigi","year":"2017"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-019-7682-5"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.3233\/ICG-210179"},{"article-title":"Suphx: Mastering Mahjong with deep reinforcement learning","year":"2020","author":"Li","key":"ref43"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"}],"container-title":["IEEE Transactions on Games"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7782673\/11038929\/10776753.pdf?arnumber=10776753","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T17:41:41Z","timestamp":1750268501000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10776753\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":44,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tg.2024.3507107","relation":{},"ISSN":["2475-1502","2475-1510"],"issn-type":[{"type":"print","value":"2475-1502"},{"type":"electronic","value":"2475-1510"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}