{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T03:30:14Z","timestamp":1772854214849,"version":"3.50.1"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"23","license":[{"start":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T00:00:00Z","timestamp":1725926400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T00:00:00Z","timestamp":1725926400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s10489-024-05830-2","type":"journal-article","created":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T07:02:21Z","timestamp":1725951741000},"page":"12156-12176","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Solving time-delay issues in reinforcement learning via transformers"],"prefix":"10.1007","volume":"54","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-3507-9732","authenticated-orcid":false,"given":"Bo","family":"Xia","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zaihui","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Minzhi","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yongzhe","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2169-0007","authenticated-orcid":false,"given":"Bo","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiheng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3542-0593","authenticated-orcid":false,"given":"Xueqian","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,10]]},"reference":[{"key":"5830_CR1","doi-asserted-by":"crossref","unstructured":"van Dis EA, Bollen J, Zuidema W et al (2023) Chatgpt: five priorities for research. Nature 614(7947):224\u2013226","DOI":"10.1038\/d41586-023-00288-7"},{"issue":"7897","key":"5830_CR2","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1038\/s41586-021-04301-9","volume":"602","author":"J Degrave","year":"2022","unstructured":"Degrave J, Felici F, Buchli J et al (2022) Magnetic control of tokamak plasmas through deep reinforcement learning. Nature 602(7897):414\u2013419","journal-title":"Nature"},{"key":"5830_CR3","doi-asserted-by":"publisher","first-page":"102227","DOI":"10.1016\/j.rcim.2021.102227","volume":"73","author":"R Zhang","year":"2022","unstructured":"Zhang R, Lv Q, Li J et al (2022) A reinforcement learning method for human-robot collaboration in assembly tasks. Robot Comput Integr Manuf 73:102227","journal-title":"Robot Comput Integr Manuf"},{"key":"5830_CR4","doi-asserted-by":"publisher","first-page":"109950","DOI":"10.1016\/j.automatica.2021.109950","volume":"135","author":"W Jiang","year":"2022","unstructured":"Jiang W, Liu K, Charalambous T (2022) Multi-agent consensus with heterogeneous time-varying input and communication delays in digraphs. Automatica 135:109950","journal-title":"Automatica"},{"issue":"3","key":"5830_CR5","doi-asserted-by":"publisher","first-page":"851","DOI":"10.1109\/LCSYS.2020.3006452","volume":"5","author":"W Jiang","year":"2020","unstructured":"Jiang W, Chen Y, Charalambous T (2020) Consensus of general linear multi-agent systems with heterogeneous input and communication delays. IEEE Control Systems Letters 5(3):851\u2013856","journal-title":"IEEE Control Systems Letters"},{"issue":"4","key":"5830_CR6","doi-asserted-by":"publisher","first-page":"872","DOI":"10.2514\/1.G005714","volume":"44","author":"H Chen","year":"2021","unstructured":"Chen H, Liu Z (2021) Time-delay prediction-based smith predictive control for space teleoperation. J Guid Control Dyn 44(4):872\u2013879","journal-title":"J Guid Control Dyn"},{"key":"5830_CR7","doi-asserted-by":"publisher","first-page":"113375","DOI":"10.1016\/j.oceaneng.2022.113375","volume":"268","author":"J Guerrero","year":"2023","unstructured":"Guerrero J, Chemori A, Torres J et al (2023) Time-delay high-order sliding mode control for trajectory tracking of autonomous underwater vehicles under disturbances. Ocean Eng 268:113375","journal-title":"Ocean Eng"},{"key":"5830_CR8","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1016\/j.jmsy.2019.11.001","volume":"54","author":"H Liu","year":"2020","unstructured":"Liu H, Wang L (2020) Remote human-robot collaboration: A cyber-physical system application for hazard manufacturing environment. J Manuf Syst 54:24\u201334","journal-title":"J Manuf Syst"},{"issue":"5","key":"5830_CR9","doi-asserted-by":"publisher","first-page":"2653","DOI":"10.1109\/LRA.2023.3257708","volume":"8","author":"X Zhou","year":"2023","unstructured":"Zhou X, Yang Z, Ren Y et al (2023) Modified bilateral active estimation model: A learning-based solution to the time delay problem in robotic tele-control. IEEE Robot Autom Lett 8(5):2653\u20132660","journal-title":"IEEE Robot Autom Lett"},{"key":"5830_CR10","unstructured":"Haarnoja T, Zhou A, Abbeel P et\u00a0al (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International conference on machine learning, PMLR, pp 1861\u20131870"},{"issue":"12","key":"5830_CR11","doi-asserted-by":"publisher","first-page":"2496","DOI":"10.1080\/00207721.2022.2056654","volume":"53","author":"Y Deng","year":"2022","unstructured":"Deng Y, L\u00e9chapp\u00e9 V, Moulay E et al (2022) Predictor-based control of time-delay systems: a survey. Int J Syst Sci 53(12):2496\u20132534","journal-title":"Int J Syst Sci"},{"issue":"12","key":"5830_CR12","doi-asserted-by":"publisher","first-page":"2480","DOI":"10.1080\/00207721.2021.2006356","volume":"53","author":"XM Zhang","year":"2022","unstructured":"Zhang XM, Han QL, Ge X (2022) The construction of augmented lyapunov-krasovskii functionals and the estimation of their derivatives in stability analysis of time-delay systems: A survey. Int J Syst Sci 53(12):2480\u20132495","journal-title":"Int J Syst Sci"},{"key":"5830_CR13","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1016\/j.neunet.2021.10.021","volume":"145","author":"A Ali","year":"2022","unstructured":"Ali A, Zhu Y, Zakarya M (2022) Exploiting dynamic spatio-temporal graph convolutional neural networks for citywide traffic flows prediction. Neural Netw 145:233\u2013247","journal-title":"Neural Netw"},{"key":"5830_CR14","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1016\/j.ins.2021.08.042","volume":"577","author":"A Ali","year":"2021","unstructured":"Ali A, Zhu Y, Zakarya M (2021) Exploiting dynamic spatio-temporal correlations for citywide traffic flow prediction using attention based neural networks. Inf Sci 577:852\u2013870","journal-title":"Inf Sci"},{"issue":"20","key":"5830_CR15","doi-asserted-by":"publisher","first-page":"31401","DOI":"10.1007\/s11042-020-10486-4","volume":"80","author":"A Ali","year":"2021","unstructured":"Ali A, Zhu Y, Zakarya M (2021) A data aggregation based approach to exploit dynamic spatio-temporal correlations for citywide crowd flows prediction in fog computing. Multimed Tools Appl 80(20):31401\u201331433","journal-title":"Multimed Tools Appl"},{"key":"5830_CR16","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/s10458-008-9056-7","volume":"18","author":"TJ Walsh","year":"2009","unstructured":"Walsh TJ, Nouri A, Li L et al (2009) Learning and planning in environments with delayed feedback. Auton Agent Multi-Agent Syst 18:83\u2013105","journal-title":"Auton Agent Multi-Agent Syst"},{"key":"5830_CR17","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1007\/s10994-012-5322-7","volume":"90","author":"T Hester","year":"2013","unstructured":"Hester T, Stone P (2013) Texplore: real-time sample-efficient reinforcement learning for robots. Mach Learn 90:385\u2013429","journal-title":"Mach Learn"},{"key":"5830_CR18","unstructured":"Derman E, Dalal G, Mannor S (2020) Acting in delayed environments with non-stationary markov policies. In: International conference on learning representations"},{"issue":"4","key":"5830_CR19","doi-asserted-by":"publisher","first-page":"568","DOI":"10.1109\/TAC.2003.809799","volume":"48","author":"KV Katsikopoulos","year":"2003","unstructured":"Katsikopoulos KV, Engelbrecht SE (2003) Markov decision processes with delays and asynchronous cost collection. IEEE Trans Autom Control 48(4):568\u2013574","journal-title":"IEEE Trans Autom Control"},{"key":"5830_CR20","doi-asserted-by":"crossref","unstructured":"Nath S, Baranwal M, Khadilkar H (2021) Revisiting state augmentation methods for reinforcement learning with stochastic delays. In: Proceedings of the 30th ACM international conference on information & knowledge management, pp 1346\u20131355","DOI":"10.1145\/3459637.3482386"},{"key":"5830_CR21","unstructured":"Ramstedt S, Pal C (2019) Real-time reinforcement learning. Adv Neural Inf Process Syst 32"},{"key":"5830_CR22","unstructured":"Xiao T, Jang E, Kalashnikov D et\u00a0al (2019) Thinking while moving: Deep reinforcement learning with concurrent control. In: International conference on learning representations"},{"key":"5830_CR23","unstructured":"Bouteiller Y, Ramstedt S, Beltrame G et\u00a0al (2021) Reinforcement learning with random delays. In: International conference on learning representations"},{"key":"5830_CR24","first-page":"15084","volume":"34","author":"L Chen","year":"2021","unstructured":"Chen L, Lu K, Rajeswaran A et al (2021) Decision transformer: Reinforcement learning via sequence modeling. Adv Neural Inf Process Syst 34:15084\u201315097","journal-title":"Adv Neural Inf Process Syst"},{"key":"5830_CR25","doi-asserted-by":"crossref","unstructured":"Schuitema E, Bu\u015foniu L, Babu\u0161ka R et\u00a0al (2010) Control delay in reinforcement learning for real-time dynamic systems: A memoryless approach. In: 2010 IEEE\/RSJ International conference on intelligent robots and systems, IEEE, pp 3226\u20133231","DOI":"10.1109\/IROS.2010.5650345"},{"key":"5830_CR26","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1016\/j.patrec.2021.06.022","volume":"150","author":"M Agarwal","year":"2021","unstructured":"Agarwal M, Aggarwal V (2021) Blind decision making: Reinforcement learning with delayed observations. Pattern Recogn Lett 150:176\u2013182","journal-title":"Pattern Recogn Lett"},{"key":"5830_CR27","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/j.neucom.2021.04.015","volume":"450","author":"B Chen","year":"2021","unstructured":"Chen B, Xu M, Li L et al (2021) Delay-aware model-based reinforcement learning for continuous control. Neurocomputing 450:119\u2013128","journal-title":"Neurocomputing"},{"key":"5830_CR28","unstructured":"Prudencio RF, Maximo MR, Colombini EL (2023) A survey on offline reinforcement learning: Taxonomy, review, and open problems. IEEE Trans Neural Netw Learn Syst"},{"key":"5830_CR29","unstructured":"Fujimoto S, Meger D, Precup D (2019) Off-policy deep reinforcement learning without exploration. In: International conference on machine learning, PMLR, pp 2052\u20132062"},{"key":"5830_CR30","unstructured":"Kumar A, Fu J, Soh M et\u00a0al (2019) Stabilizing off-policy q-learning via bootstrapping error reduction. Adv Neural Inf Process Syst 32"},{"key":"5830_CR31","unstructured":"Fujimoto S, Gu SS (2021) A minimalist approach to offline reinforcement learning. Adv Neural Inf Process Syst 34:20132\u201320145"},{"key":"5830_CR32","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar A, Zhou A, Tucker G et al (2020) Conservative q-learning for offline reinforcement learning. Adv Neural Inf Process Syst 33:1179\u20131191","journal-title":"Adv Neural Inf Process Syst"},{"key":"5830_CR33","first-page":"28954","volume":"34","author":"T Yu","year":"2021","unstructured":"Yu T, Kumar A, Rafailov R et al (2021) Combo: Conservative offline model-based policy optimization. Adv Neural Inf Process Syst 34:28954\u201328967","journal-title":"Adv Neural Inf Process Syst"},{"key":"5830_CR34","unstructured":"Janner M, Fu J, Zhang M et\u00a0al (2019) When to trust your model: Model-based policy optimization. Adv Neural Inf Process Syst 32"},{"key":"5830_CR35","first-page":"21810","volume":"33","author":"R Kidambi","year":"2020","unstructured":"Kidambi R, Rajeswaran A, Netrapalli P et al (2020) Morel: Model-based offline reinforcement learning. Adv Neural Inf Process Syst 33:21810\u201321823","journal-title":"Adv Neural Inf Process Syst"},{"key":"5830_CR36","unstructured":"Zhang R, Dai B, Li L et\u00a0al (2020) Gendice: Generalized offline estimation of stationary values. Int Conf Learn Representations"},{"key":"5830_CR37","first-page":"1273","volume":"34","author":"M Janner","year":"2021","unstructured":"Janner M, Li Q, Levine S (2021) Offline reinforcement learning as one big sequence modeling problem. Adv Neural Inf Process Syst 34:1273\u20131286","journal-title":"Adv Neural Inf Process Syst"},{"key":"5830_CR38","unstructured":"Li W, Luo H, Lin Z et\u00a0al (2023) A survey on transformers in reinforcement learning. Transactions on Machine Learning Research pp 2835\u20138856"},{"issue":"7782","key":"5830_CR39","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals O, Babuschkin I, Czarnecki WM et al (2019) Grandmaster level in starcraft ii using multi-agent reinforcement learning. Nature 575(7782):350\u2013354","journal-title":"Nature"},{"key":"5830_CR40","unstructured":"Parisotto E, Song F, Rae J, et\u00a0al (2020) Stabilizing transformers for reinforcement learning. In: International conference on machine learning, PMLR, pp 7487\u20137498"},{"key":"5830_CR41","unstructured":"Micheli V, Alonso E, Fleuret F (2023) Transformers are sample efficient world models"},{"key":"5830_CR42","unstructured":"Ozair S, Li Y, Razavi A et\u00a0al (2021) Vector quantized models for planning. In: international conference on machine learning, PMLR, pp 8302\u20138313"},{"key":"5830_CR43","first-page":"38966","volume":"35","author":"K Paster","year":"2022","unstructured":"Paster K, McIlraith S, Ba J (2022) You can\u2019t count on luck: Why decision transformers and rvs fail in stochastic environments. Adv Neural Inf Process Syst 35:38966\u201338979","journal-title":"Adv Neural Inf Process Syst"},{"key":"5830_CR44","unstructured":"Yang M, Schuurmans D, Abbeel P et\u00a0al (2023) Dichotomy of control: Separating what you can control from what you cannot. The Eleventh International Conference on Learning Representations"},{"key":"5830_CR45","unstructured":"Lee KH, Nachum O, Yang M et\u00a0al (2022) Multi-game decision transformers. Adv Neural Inf Process Syst"},{"key":"5830_CR46","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: An introduction. MIT press"},{"key":"5830_CR47","unstructured":"Hu K, Zheng RC, Gao Y et\u00a0al (2023) Decision transformer under random frame dropping. The Eleventh International Conference on Learning Representations"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05830-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-024-05830-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05830-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T13:08:19Z","timestamp":1727701699000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-024-05830-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,10]]},"references-count":47,"journal-issue":{"issue":"23","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["5830"],"URL":"https:\/\/doi.org\/10.1007\/s10489-024-05830-2","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,10]]},"assertion":[{"value":"30 August 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 September 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"All authors disclosed no relevant relationships.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}]}}