{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T12:07:23Z","timestamp":1743077243305,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031723407"},{"type":"electronic","value":"9783031723414"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72341-4_26","type":"book-chapter","created":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T13:02:55Z","timestamp":1726491775000},"page":"381-396","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Dynamic Modeling for\u00a0Reinforcement Learning with\u00a0Random Delay"],"prefix":"10.1007","author":[{"given":"Yalou","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"xia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Minzhi","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiheng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuwqian","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,17]]},"reference":[{"issue":"1","key":"26_CR1","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1145\/149439.133106","volume":"20","author":"E Altman","year":"1992","unstructured":"Altman, E., Nain, P.: Closed-loop control with delayed information. ACM Sigmetrics Perform. Eval. Rev. 20(1), 193\u2013204 (1992)","journal-title":"ACM Sigmetrics Perform. Eval. Rev."},{"key":"26_CR2","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1007\/s10915-017-0628-z","volume":"76","author":"R Hannah","year":"2018","unstructured":"Hannah, R., Yin, W.: On unbounded delays in asynchronous parallel fixed-point algorithms. J. Sci. Comput. 76, 299\u2013326 (2018)","journal-title":"J. Sci. Comput."},{"issue":"3","key":"26_CR3","doi-asserted-by":"publisher","first-page":"499","DOI":"10.1109\/TRA.2004.825271","volume":"20","author":"T Imaida","year":"2004","unstructured":"Imaida, T., Yokokohji, Y., Doi, T., Oda, M., Yoshikawa, T.: Ground-space bilateral teleoperation of ETS-VII robot arm by direct bilateral coupling under 7-s time delay condition. IEEE Trans. Robot. Autom. 20(3), 499\u2013511 (2004)","journal-title":"IEEE Trans. Robot. Autom."},{"doi-asserted-by":"crossref","unstructured":"Jin, M., Kang, S.H., Chang, P.H.: Robust compliant motion control of robot with nonlinear friction using time-delay estimation. IEEE Trans. Ind. Electron. 55(1), 258\u2013269 (2008)","key":"26_CR4","DOI":"10.1109\/TIE.2007.906132"},{"doi-asserted-by":"crossref","unstructured":"Lampe, T., Fiederer, L.D.J., Voelker, M., Knorr, A., Riedmiller, M., Ball, T.: A brain-computer interface for high-level remote control of an autonomous, reinforcement-learning-based robotic system for reaching and grasping. In: Proceedings of the 19th international conference on Intelligent User Interfaces, pp. 83\u201388 (2014)","key":"26_CR5","DOI":"10.1145\/2557500.2557533"},{"doi-asserted-by":"crossref","unstructured":"Katsikopoulos, K.V., Engelbrecht, S.E.: Markov decision processes with delays and asynchronous cost collection. IEEE Trans. Autom. Control 48(4), 568\u2013574 (2003)","key":"26_CR6","DOI":"10.1109\/TAC.2003.809799"},{"doi-asserted-by":"crossref","unstructured":"Walsh, T.J., Nouri, A., Li, L., Littman, M.L.: Planning and learning in environments with delayed feedback. In: Machine Learning: ECML 2007: 18th European Conference on Machine Learning, Warsaw, Poland, September 17-21, 2007. Proceedings 18, pp. 442\u2013453. Springer (2007)","key":"26_CR7","DOI":"10.1007\/978-3-540-74958-5_41"},{"doi-asserted-by":"crossref","unstructured":"Zhu, X., Zheng, X., Zhang, Q., Chen, Z., Liu, Y., Liang, B.: Sim-to-real transfer with action mapping and state prediction for robot motion control. In: 2021 6th Asia-Pacific Conference on Intelligent Robot Systems (ACIRS), pp. 1\u20136. IEEE (2021)","key":"26_CR8","DOI":"10.1109\/ACIRS52449.2021.9519311"},{"key":"26_CR9","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/j.neucom.2021.04.015","volume":"450","author":"B Chen","year":"2021","unstructured":"Chen, B., Mengdi, X., Li, L., Zhao, D.: Delay-aware model-based reinforcement learning for continuous control. Neurocomputing 450, 119\u2013128 (2021)","journal-title":"Neurocomputing"},{"unstructured":"Firoiu, V., Ju, T., Tenenbaum, J.: At human speed: deep reinforcement learning with action delay (2018). arXiv preprint arXiv:1810.07286","key":"26_CR10"},{"doi-asserted-by":"crossref","unstructured":"Liotet, P., Venneri, E., Restelli, M.: Learning a belief representation for delayed reinforcement learning. In: 2021 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20138. IEEE (2021)","key":"26_CR11","DOI":"10.1109\/IJCNN52387.2021.9534358"},{"unstructured":"Ramstedt, S., Pal, C.: Real-time reinforcement learning. Adv. Neural Inf. Proc. Syst. 32 (2019)","key":"26_CR12"},{"doi-asserted-by":"crossref","unstructured":"Schuitema, E., Bu\u015foniu, L., Babu\u0161ka, R., Jonker, P.: Control delay in reinforcement learning for real-time dynamic systems: a memoryless approach. In: 2010 IEEE\/RSJ international conference on intelligent robots and systems, pp. 3226\u20133231. IEEE (2010)","key":"26_CR13","DOI":"10.1109\/IROS.2010.5650345"},{"unstructured":"Xiao, T., et al. Thinking while moving: Deep reinforcement learning with concurrent control (2020). arXiv preprint arXiv:2004.06089","key":"26_CR14"},{"doi-asserted-by":"crossref","unstructured":"Nath, S., Baranwal, M., Khadilkar, H.: Revisiting state augmentation methods for reinforcement learning with stochastic delays. In: Proceedings of the 30th ACM International Conference on Information and Knowledge Management, pp. 1346\u20131355 (2021)","key":"26_CR15","DOI":"10.1145\/3459637.3482386"},{"unstructured":"Bouteiller, Y., Ramstedt, S., Beltrame, G., Pal, C., Binas, J.: Reinforcement learning with random delays. In: International Conference on Learning Representations (2020)","key":"26_CR16"},{"unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)","key":"26_CR17"},{"unstructured":"Ziebart, B.D., et\u00a0al.: Maximum entropy inverse reinforcement learning. In: AAAI, volume\u00a08, pp. 1433\u20131438. Chicago, IL, USA (2008)","key":"26_CR18"},{"doi-asserted-by":"crossref","unstructured":"Toussaint, M.: Robot trajectory optimization using approximate inference. In: Proceedings of the 26th Annual International Conference On Machine Learning, pp. 1049\u20131056 (2009)","key":"26_CR19","DOI":"10.1145\/1553374.1553508"},{"doi-asserted-by":"crossref","unstructured":"Rawlik, K., Toussaint, M., Vijayakumar, S.: On stochastic optimal control and reinforcement learning by approximate inference. In: Proceedings of the Twenty-Third International Joint Conference on Artificial Intelligence, pp. 3052\u20133056 (2013)","key":"26_CR20","DOI":"10.7551\/mitpress\/9816.003.0050"},{"unstructured":"Fox, R., Pakman, A., Tishby, N.: Taming the noise in reinforcement learning via soft updates. In: 32nd Conference on Uncertainty in Artificial Intelligence 2016, UAI 2016, pp. 202\u2013211. Association For Uncertainty in Artificial Intelligence (AUAI) (2016)","key":"26_CR21"},{"unstructured":"Haarnoja, T., Tang, H., Abbeel, P., Levine, S.: Reinforcement learning with deep energy-based policies. In: International Conference on Machine Learning, pp. 1352\u20131361. PMLR (2017)","key":"26_CR22"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72341-4_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T13:13:39Z","timestamp":1726492419000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72341-4_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031723407","9783031723414"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72341-4_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"17 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lugano","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"33","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}