{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T13:46:10Z","timestamp":1774964770432,"version":"3.50.1"},"reference-count":211,"publisher":"Zhejiang University Press","issue":"6","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front Inform Technol Electron Eng"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1631\/fitee.2300548","type":"journal-article","created":{"date-parts":[[2024,7,5]],"date-time":"2024-07-05T10:33:01Z","timestamp":1720175581000},"page":"763-790","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":34,"title":["Transformer in reinforcement learning for decision-making: a survey","\u57fa\u4e8eTransformer\u7684\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5\u5728\u667a\u80fd\u51b3\u7b56\u9886\u57df\u7684\u5e94\u7528: \u7efc\u8ff0"],"prefix":"10.1631","volume":"25","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9894-5253","authenticated-orcid":false,"given":"Weilin","family":"Yuan","sequence":"first","affiliation":[]},{"given":"Jiaxing","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Shaofei","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Dawei","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Zhenzhen","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Peng","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6989-8536","authenticated-orcid":false,"given":"Weiwei","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"635","published-online":{"date-parts":[[2024,7,5]]},"reference":[{"key":"ref1","article-title":"CausalWorld: a robotic manipulation benchmark for causal structure and transfer learning","author":"Ahmed","year":"2021","journal-title":"Proc 9th Int Conf on Learning Representations"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3390\/rs15071860"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3390\/e22111272"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6219"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-05035-5_2"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2018.00387"},{"key":"ref8","volume-title":"Layer normalization","author":"Ba","year":"2016"},{"key":"ref9","first-page":"507","article-title":"Agent57: outperforming the Atari human benchmark","author":"Badia","year":"2020","journal-title":"Proc 37th Int Conf on Machine Learning"},{"key":"ref10","article-title":"Adaptive input representations for neural language modeling","author":"Baevski","year":"2018","journal-title":"Proc 7th Int Conf on Learning Representations"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.neuro.26.041002.131047"},{"key":"ref12","first-page":"65","article-title":"METEOR: an automatic metric for MT evaluation with improved correlation with human judgments","author":"Banerjee","year":"2005","journal-title":"Proc ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/tg.2022.3189426"},{"key":"ref14","first-page":"1887","article-title":"Human-timescale adaptation in an open-ended task space","author":"Bauer","year":"2023","journal-title":"Proc 40th Int Conf on Machine Learning"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref16","article-title":"Neural combinatorial optimization with reinforcement learning","author":"Bello","year":"2017","journal-title":"Proc 5th Int Conf on Learning Representations"},{"key":"ref17","volume-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1287\/moor.27.4.819.297"},{"key":"ref19","volume-title":"On the opportunities and risks of foundation models","author":"Bommasani","year":"2021"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/icra.2015.7139457"},{"key":"ref21","volume-title":"The Transformer network for the traveling salesman problem","author":"Bresson","year":"2021"},{"key":"ref22","volume-title":"OpenAI Gym","author":"Brockman","year":"2016"},{"key":"ref23","article-title":"Language models are few-shot learners","author":"Brown","year":"2020","journal-title":"Proc 34th Int Conf on Neural Information Processing Systems"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr46437.2021.01212"},{"key":"ref26","first-page":"15084","article-title":"Decision Transformer: reinforcement learning via sequence modeling","author":"Chen","year":"2021","journal-title":"Proc 34th Int Conf on Neural Information Processing Systems"},{"key":"ref27","first-page":"1691","article-title":"Generative pretraining from pixels","author":"Chen","year":"2020","journal-title":"Proc 37th Int Conf on Machine Learning"},{"key":"ref28","volume-title":"A survey of model compression and acceleration for deep neural networks","author":"Cheng","year":"2020"},{"key":"ref30","volume-title":"Assistive Tele-op: leveraging Transformers to collect robotic task demonstrations","author":"Clever","year":"2022"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"ref32","volume-title":"Hierarchical Decision Transformer","author":"Correia","year":"2022"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-75538-8_7"},{"key":"ref34","article-title":"Real world games look like spinning tops","author":"Czarnecki","year":"2020","journal-title":"Proc 34th Int Conf on Neural Information Processing Systems"},{"key":"ref35","first-page":"2489","article-title":"Catformer: designing stable Transformers via sensitivity analysis","author":"Davis","year":"2021","journal-title":"Proc 38th Int Conf on Machine Learning"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93031-2_12"},{"key":"ref37","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional Transformers for language under-standing","volume-title":"Proc Conf on North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"Devlin","year":"2019"},{"key":"ref39","first-page":"2793","article-title":"Attention is not all you need: pure attention loses rank doubly expo-nentially with depth","author":"Dong","year":"2021","journal-title":"Proc 38th Int Conf on Machine Learning"},{"key":"ref40","article-title":"An image is worth 16 \u00d7 16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2021","journal-title":"Proc 9th Int Conf on Learning Representations"},{"key":"ref41","first-page":"5547","article-title":"GLaM: efficient scaling of language models with mixture-of-experts","author":"Du","year":"2022","journal-title":"Proc 39th Int Conf on Machine Learning"},{"key":"ref43","first-page":"373","volume-title":"Inferring maps and behaviors from natural language instructions","author":"Duvallet","year":"2016"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr46437.2021.00447"},{"key":"ref45","volume-title":"Convexifying Trans-formers: improving optimization and understanding of Transformer networks","author":"Ergen","year":"2022"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr46437.2021.01268"},{"key":"ref47","article-title":"Deep Transformer Q-networks for partially observable reinforcement learning","author":"Esslinger","year":"2022"},{"key":"ref48","first-page":"18343","article-title":"MineDojo: building open-ended embodied agents with internet-scale knowledge","author":"Fan","year":"2022","journal-title":"Proc 36th Int Conf on Neural Information Processing Systems"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref50","article-title":"Generalized decision Transformer for offline hindsight information matching","author":"Furuta","year":"2022","journal-title":"Proc 10th Int Conf on Learning Representations"},{"key":"ref51","first-page":"1243","article-title":"Convolutional sequence to sequence learning","author":"Gehring","year":"2017","journal-title":"Proc 34th Int Conf on Machine Learning"},{"key":"ref52","volume-title":"Combining rein-forcement learning and optimal transport for the traveling salesman problem","author":"Goh","year":"2022"},{"issue":"3","key":"ref53","first-page":"307","article-title":"The orienteering problem","volume-title":"Nav Res Log","volume":"34","author":"Golden","year":"1987"},{"key":"ref54","article-title":"Generating sequences with recurrent neural networks","author":"Graves","year":"2013"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09996-w"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.524"},{"key":"ref57","first-page":"175","article-title":"Instruction-driven history-aware policies for robotic manipulations","author":"Guhur","year":"2022","journal-title":"Proc 6th Conf on Robot Learning"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016489"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/339"},{"key":"ref60","first-page":"1856","article-title":"Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018","journal-title":"Proc 35th Int Conf on Machine Learning"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2022.3152247"},{"key":"ref62","volume-title":"Learning generalizable vision-tactile robotic grasping strategy for deformable objects via Transformer","author":"Han","year":"2021"},{"key":"ref63","first-page":"3680","article-title":"Stabilizing deep Q-learning with ConvNets and vision Transformers under data aug-mentation","author":"Hansen","year":"2021","journal-title":"Proc 34th Int Conf on Neural Information Processing Systems"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1137\/1024022"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2016.90"},{"key":"ref66","volume-title":"Grounded language learning in a simulated 3D world","author":"Hermann","year":"2017"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref68","article-title":"Structure-aware Trans-former policy for inhomogeneous multi-task reinforcement learning","author":"Hong","year":"2022","journal-title":"Proc 10th Int Conf on Learning Representations"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2021.3079209"},{"key":"ref70","volume-title":"UPDeT: universal multi-agent reinforcement learning via policy decoupling with Transformers","author":"Hu","year":"2021"},{"key":"ref71","volume-title":"A Review of the Decision Transformer Archi-tecture: Framing Reinforcement Learning as a Sequence Modeling Problem","author":"Imhof","year":"2022"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1126\/science.aau6249"},{"key":"ref73","first-page":"1011","article-title":"Transformers are adaptable task planners","author":"Jain","year":"2023","journal-title":"Proc 6th Conf on Robot Learning"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2020.2974707"},{"key":"ref75","first-page":"1273","article-title":"Offline reinforcement learning as one big sequence modeling problem","author":"Janner","year":"2021","journal-title":"Proc 34th Int Conf on Neural Information Processing Systems"},{"key":"ref76","first-page":"14745","article-title":"TransGAN: two pure Transformers can make one strong GAN, and that can scale up","author":"Jiang","year":"2021","journal-title":"Proc 34th Int Conf on Neural Information Processing Systems"},{"key":"ref77","volume-title":"Scaling laws for neural language models","author":"Kaplan","year":"2020"},{"key":"ref78","article-title":"Human-level Atari 200 \u00d7 faster","author":"Kapturowski","year":"2023","journal-title":"Proc 11th Int Conf on Learning Representations"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2019.2929141"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1609\/aiide.v18i1.21954"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/d14-1181"},{"key":"ref82","volume-title":"Algorithms for Decision Making","author":"Kochenderfer","year":"2022"},{"key":"ref83","author":"Kool","year":"2019","journal-title":"Attention, learn to solve routing problems! Proc 7th Int Conf on Learning Representations"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref85","first-page":"13458","article-title":"Settling the variance of multi-agent policy gradients","author":"Kuba","year":"2021","journal-title":"Proc 34th Int Conf on Neural Information Processing Systems"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d18-2012"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5878"},{"key":"ref88","article-title":"ALBERT: a lite BERT for self-supervised learning of language representations","author":"Lan","year":"2020","journal-title":"Proc 8th Int Conf on Learning Representations"},{"key":"ref89","first-page":"27921","article-title":"Multi-game decision Transformers","author":"Lee","year":"2022","journal-title":"Proc 36th Int Conf on Neural Information Processing Systems"},{"key":"ref90","volume-title":"Offline reinforcement learning: tutorial, review, and perspectives on open problems","author":"Levine","year":"2020"},{"key":"ref91","article-title":"Learning multi-level hierarchies with hindsight","author":"Levy","year":"2019","journal-title":"Proc 7th Int Conf on Learning Representations"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.86"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52688.2022.01216"},{"key":"ref95","volume-title":"Suphx: mastering Mahjong with deep reinforcement learning","author":"Li","year":"2020"},{"key":"ref96","article-title":"Learning to navigate in interactive environments with the Transformer-based memory","author":"Li","year":"2022"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.3390\/app12147053"},{"key":"ref98","article-title":"A survey of decision making in adversarial games","volume-title":"Sci China Inform Sci, early access","author":"Li","year":"2023"},{"key":"ref99","first-page":"74","article-title":"ROUGE: a package for automatic evaluation of summaries","author":"Lin","year":"2004","journal-title":"Proc Text Summarization Branches Out"},{"key":"ref100","volume-title":"Switch Trajectory Transformer with distributional value approximation for multi-task reinforcement learning","author":"Lin","year":"2022"},{"key":"ref101","volume-title":"Contextual Transformer for offline meta reinforcement learning","author":"Lin","year":"2022"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2022.10.001"},{"key":"ref103","article-title":"Analyzing attention mechanisms through lens of sample complexity and loss landscape","author":"Liu","year":"2021","journal-title":"Proc Int Conf on Learning Representations"},{"key":"ref104","volume-title":"Augmenting re-inforcement learning with Transformer-based scene representation learning for decision-making of autonomous driving","author":"Liu","year":"2022"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.463"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1137\/18m1177615"},{"key":"ref107","volume-title":"RoBERTa: a robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"ref108","first-page":"6382","article-title":"Multi-agent actorcritic for mixed cooperative-competitive environments","author":"Lowe","year":"2017","journal-title":"Proc 31st Int Conf on Neural Information Processing Systems"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i7.20729"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.3390\/a15080282"},{"key":"ref111","volume-title":"TorchScale: Transformers at scale","author":"Ma","year":"2022"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1016\/j.cor.2021.105400"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2022.3180108"},{"key":"ref114","first-page":"15340","article-title":"Transformers are meta-reinforcement learners","author":"Melo","year":"2022","journal-title":"Proc 39th Int Conf on Machine Learning"},{"key":"ref115","article-title":"Offline pretrained multi-agent decision Transformer: one big sequence model tackles all SMAC tasks","author":"Meng","year":"2021"},{"key":"ref116","first-page":"7654","article-title":"Counterfactual credit assignment in model-free reinforcement learning","author":"Mesnard","year":"2021","journal-title":"Proc 38th Int Conf on Machine Learning"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.14778\/3570690.3570697"},{"key":"ref118","first-page":"7780","article-title":"Offline meta-reinforcement learning with advantage weighting","author":"Mitchell","year":"2021","journal-title":"Proc 38th Int Conf on Machine Learning"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.3390\/electronics10232918"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1126\/science.aam6960"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ijcnn55064.2022.9892253"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.03.091"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9383"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1613\/jair.2447"},{"issue":"1","key":"ref125","first-page":"5603","article-title":"Navigating the landscape of multiplayer games","volume-title":"Nat Commun","volume":"11","author":"Omidshafiei","year":"2020"},{"key":"ref126","volume-title":"Open-ended learning leads to generally capable agents","author":"Ended","year":"2021"},{"key":"ref127","volume-title":"Meta-learning of sequential strategies","author":"Ortega","year":"2019"},{"key":"ref128","first-page":"8302","article-title":"Vector quantized models for planning","author":"Ozair","year":"2021","journal-title":"Proc 38th Int Conf on Machine Learning"},{"key":"ref129","first-page":"1783","article-title":"TAX-pose: task-specific cross-pose estimation for robot manipulation","author":"Pan","year":"2023","journal-title":"Proc 6th Conf on Robot Learning"},{"key":"ref130","article-title":"Silver-bullet-3D at ManiSkill 2021: learning-from-demonstrations and heuristic rule-based methods for object manipulation","author":"Pan","year":"2022","journal-title":"Proc Int Conf on Learning Representations"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref132","article-title":"Efficient Transformers in reinforcement learning using actor-learner distillation","author":"Parisotto","year":"2021","journal-title":"Proc 9th Int Conf on Learning Representations"},{"key":"ref133","first-page":"7487","article-title":"Stabilizing Transformers for reinforcement learning","author":"Parisotto","year":"2020","journal-title":"Proc 37th Int Conf on Machine Learning"},{"key":"ref134","first-page":"1043","article-title":"Reinforcement learning with hierarchies of machines","author":"Parr","year":"1997","journal-title":"Proc 10th Int Conf on Neural Information Processing Systems"},{"key":"ref135","article-title":"Planning from pixels using inverse dynamics models","author":"Paster","year":"2021","journal-title":"Proc 9th Int Conf on Learning Representations"},{"key":"ref136","first-page":"38966","article-title":"You can\u2019t count on luck: why decision Transformers and RvS fail in stochastic environments","author":"Paster","year":"2022","journal-title":"Proc 36th Int Conf on Neural Information Processing Systems"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1145\/3453160"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1142\/s0218213012400052"},{"key":"ref139","volume-title":"Formal algorithms for Transformers","author":"Phuong","year":"2022"},{"key":"ref140","volume-title":"A model-based approach to meta-reinforcement learning: Transformers and tree search","author":"Pinon","year":"2022"},{"key":"ref141","article-title":"Improving language understanding by generative pre-training","author":"Radford","year":"2018"},{"issue":"8","key":"ref142","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref143","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021","journal-title":"Proc 38th Int Conf on Machine Learning"},{"issue":"1","key":"ref144","first-page":"140","article-title":"Exploring the limits of transfer learning with a unified text-to-text Transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J Mach Learn Res"},{"issue":"1","key":"ref145","first-page":"178","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J Mach Learn Res"},{"key":"ref146","first-page":"2835","article-title":"A generalist agent","author":"Reed","year":"2022","journal-title":"Trans Mach Learn Res"},{"key":"ref147","volume-title":"Can Wikipedia help offline reinforcement learning?","author":"Reid","year":"2022"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.65109\/LVZZ5205"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1109\/irc55401.2022.00058"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref151","volume-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.102802"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_27"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n18-2074"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511811654"},{"key":"ref156","first-page":"785","article-title":"Perceiver-actor: a multi-task Transformer for robotic manipulation","author":"Shridhar","year":"2023","journal-title":"Proc 6th Conf on Robot Learning"},{"key":"ref157","volume-title":"How crucial is Transformer in Decision Transformer?","author":"Siebenborn","year":"2022"},{"key":"ref158","volume-title":"Mastering Chess and Shogi by self-play with a general reinforcement learning algorithm","author":"Silver","year":"2017a"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09997-9"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2020.101813"},{"key":"ref162","volume-title":"Training agents using upside-down reinforcement learning","author":"Srivastava","year":"2019"},{"key":"ref163","article-title":"Some considerations on learning to explore via meta-reinforcement learning","author":"Stadie","year":"2018"},{"key":"ref164","volume-title":"Reinforcement Learning: an Introduction","author":"Sutton","year":"2018"},{"key":"ref165","volume-title":"On layer nor-malizations and residual connections in Transformers","author":"Takase","year":"2022"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1145\/3530811"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611973594"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1016\/j.simpa.2020.100022"},{"key":"ref169","volume-title":"Transformer based reinforcement learning for games","author":"Upadhyay","year":"2019"},{"key":"ref170","volume-title":"Attention interpretability across NLP tasks","author":"Vashishth","year":"2019"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2015.7299087"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/access.2020.3004964"},{"key":"ref174","first-page":"22270","article-title":"Addressing optimism bias in sequence modeling for reinforcement learning","author":"Villaflor","year":"2022","journal-title":"Proc 39th Int Conf on Machine Learning"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p19-1580"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-022-03924-3"},{"key":"ref178","volume-title":"DeepNet: scaling Transformers to 1,000 layers","author":"Wang","year":"2022"},{"key":"ref179","article-title":"Alchemy: a bench-mark and analysis toolkit for meta-reinforcement learning agents","author":"Wang","year":"2021","journal-title":"Proc 1 st Neural Information Processing Systems Track on Datasets and Benchmarks"},{"key":"ref180","first-page":"34748","article-title":"Bootstrapped Transformer for offline reinforcement learning","author":"Wang","year":"2022","journal-title":"Proc 36th Int Conf on Neural Information Processing Systems"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539480"},{"key":"ref182","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.107526"},{"key":"ref183","first-page":"16509","article-title":"Multi-agent re-inforcement learning is a sequence modeling problem","author":"Wen","year":"2022","journal-title":"Proc 36th Int Conf on Neural Information Processing Systems"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1002\/9781119606475"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.3390\/electronics10243050"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2021.3068828"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr42600.2020.01111"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.3390\/make3030029"},{"key":"ref189","volume-title":"Pretraining in deep reinforcement learning: a survey","author":"Xie","year":"2022"},{"key":"ref190","first-page":"10524","article-title":"On layer normalization in the Transformer architecture","author":"Xiong","year":"2020","journal-title":"Proc 37th Int Conf on Machine Learning"},{"key":"ref191","first-page":"24631","article-title":"Prompting Decision Transformer for few-shot policy generalization","author":"Xu","year":"2022","journal-title":"Proc 39th Int Conf on Machine Learning"},{"key":"ref192","article-title":"Q-learning decision Transformer: leveraging dynamic programming for conditional sequence modelling in offline RL","author":"Yamagata","year":"2023","journal-title":"Proc 40th Int Conf on Machine Learning"},{"key":"ref193","article-title":"Learning vision-guided quadrupedal locomotion end-to-end with cross-modal Transformers","author":"Yang","year":"2022","journal-title":"Proc 10th Int Conf on Learning Representations"},{"key":"ref194","volume-title":"An overview of multi-agent reinforcement learning from game theoretical perspective","author":"Yang","year":"2020"},{"key":"ref195","article-title":"Multi-agent deter-minantal Q-learning","author":"Yang","year":"2020","journal-title":"Proc 37th Int Conf on Machine Learning"},{"key":"ref196","first-page":"34874","article-title":"Transformer-based working memory for multiagent reinforcement learning with action parsing","author":"Yang","year":"2022","journal-title":"Proc 36th Int Conf on Neural Information Processing Systems"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2022.3186494"},{"key":"ref198","article-title":"XLNet: generalized autoregressive pretraining for language understanding","author":"Yang","year":"2019","journal-title":"Proc 33rd Int Conf on Neural Information Processing Systems"},{"key":"ref199","volume-title":"Random-LTD: random and layerwise token dropping brings efficient training for large-scale Transformers","author":"Yao","year":"2022"},{"key":"ref200","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","author":"Yu","year":"2022","journal-title":"Proc 36th Int Conf on Neural Information Processing Systems"},{"key":"ref201","article-title":"Gradient surgery for multi-task learning","author":"Yu","year":"2020a","journal-title":"Proc 34th Int Conf on Neural Information Processing Systems"},{"key":"ref202","first-page":"1094","article-title":"Meta-World: a benchmark and evaluation for multi-task and meta reinforcement learning","author":"Yu","year":"2020b","journal-title":"Proc Conf on Robot Learning"},{"key":"ref203","first-page":"2459","article-title":"Imperfect information game in multiplayer no-limit Texas hold\u2019em based on mean approximation and deep CFVnet","volume-title":"Proc China Automation Congress","author":"Yuan","year":"2021"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.3390\/electronics11091339"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1109\/access.2020.2983149"},{"key":"ref206","volume-title":"Recurrent neural network regularization","author":"Zaremba","year":"2014"},{"key":"ref207","first-page":"12333","article-title":"DouZero: mastering DouDizhu with self-play deep reinforcement learning","author":"Zha","year":"2021","journal-title":"Proc 38th Int Conf on Machine Learning"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17300"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i4.20394"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1109\/ssci47803.2020.9308468"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1109\/cog51982.2022.9893710"},{"key":"ref212","first-page":"27042","article-title":"Online decision Transformer","author":"Zheng","year":"2022","journal-title":"Proc 39th Int Conf on Machine Learning"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1631\/fitee.2300089"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2018.00907"}],"container-title":["Frontiers of Information Technology &amp; Electronic Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1631\/FITEE.2300548.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1631\/FITEE.2300548\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1631\/FITEE.2300548.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T06:58:55Z","timestamp":1771657135000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1631\/FITEE.2300548"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":211,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["2011"],"URL":"https:\/\/doi.org\/10.1631\/fitee.2300548","relation":{},"ISSN":["2095-9184","2095-9230"],"issn-type":[{"value":"2095-9184","type":"print"},{"value":"2095-9230","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6]]},"assertion":[{"value":"14 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 November 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 July 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"All the authors declare that they have no conflict of interest.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}