{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T15:06:51Z","timestamp":1777129611061,"version":"3.51.4"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2024,4,7]],"date-time":"2024-04-07T00:00:00Z","timestamp":1712448000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,7]],"date-time":"2024-04-07T00:00:00Z","timestamp":1712448000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2021YFB1714800"],"award-info":[{"award-number":["2021YFB1714800"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1007\/s13042-024-02125-3","type":"journal-article","created":{"date-parts":[[2024,4,7]],"date-time":"2024-04-07T11:01:25Z","timestamp":1712487685000},"page":"3879-3891","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Improving world models for robot arm grasping with backward dynamics prediction"],"prefix":"10.1007","volume":"15","author":[{"given":"Yetian","family":"Yuan","sequence":"first","affiliation":[]},{"given":"Shuze","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yunpeng","family":"Mei","sequence":"additional","affiliation":[]},{"given":"Weipu","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Gang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,7]]},"reference":[{"issue":"5","key":"2125_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3629-1","volume":"66","author":"Y Li","year":"2023","unstructured":"Li Y, Wang X, Sun J, Wang G, Chen J (2023) Data-driven consensus control of fully distributed event-triggered multi-agent systems. Sci China Inf Sci 66(5):152202","journal-title":"Sci China Inf Sci"},{"issue":"5","key":"2125_CR2","doi-asserted-by":"publisher","first-page":"616","DOI":"10.1016\/J.ENG.2017.05.015","volume":"3","author":"RY Zhong","year":"2017","unstructured":"Zhong RY, Xu X, Klotz E, Newman ST (2017) Intelligent manufacturing in the context of industry 4.0: a review. Engineering 3(5):616\u2013630","journal-title":"Engineering"},{"issue":"6","key":"2125_CR3","doi-asserted-by":"publisher","first-page":"2505","DOI":"10.1109\/TKDE.2019.2959991","volume":"33","author":"H Peng","year":"2019","unstructured":"Peng H, Li J, Wang S, Wang L, Gong Q, Yang R, Li B, Philip SY, He L (2019) Hierarchical taxonomy-aware and attentional graph capsule RCNNs for large-scale multi-label text classification. IEEE Trans Knowl Data Eng 33(6):2505\u20132519","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"2125_CR4","doi-asserted-by":"crossref","unstructured":"Zhang R, Peng H, Dou Y, Wu J, Sun Q, Li Y, Zhang J, Yu PS (2022) Automating DBSCAN via deep reinforcement learning. In: Proceedings of the 31st ACM international conference on information and knowledge management, pp 2620\u20132630","DOI":"10.1145\/3511808.3557245"},{"key":"2125_CR5","unstructured":"Li Y (2017) Deep reinforcement learning: an overview. arXiv:1701.07274"},{"issue":"8","key":"2125_CR6","doi-asserted-by":"publisher","first-page":"4722","DOI":"10.1109\/TAC.2022.3209399","volume":"68","author":"W Liu","year":"2023","unstructured":"Liu W, Sun J, Wang G, Bullo F, Chen J (2023) Data-driven resilient predictive control under denial-of-service. IEEE Trans Autom Control 68(8):4722\u20134737","journal-title":"IEEE Trans Autom Control"},{"issue":"3","key":"2125_CR7","doi-asserted-by":"publisher","first-page":"2313","DOI":"10.1109\/TSG.2019.2951769","volume":"11","author":"Q Yang","year":"2020","unstructured":"Yang Q, Wang G, Sadeghi A, Giannakis GB, Sun J (2020) Two-timescale voltage control in distribution grids using deep reinforcement learning. IEEE Trans Smart Grid 11(3):2313\u20132323. https:\/\/doi.org\/10.1109\/TSG.2019.2951769","journal-title":"IEEE Trans Smart Grid"},{"key":"2125_CR8","unstructured":"Zhang W, Wang G, Sun J, Yuan Y, Huang G (2023) STORM: efficient stochastic transformer based world models for reinforcement learning. In: Conference on neural information processing systems"},{"issue":"3","key":"2125_CR9","doi-asserted-by":"publisher","first-page":"2379","DOI":"10.1109\/TRO.2023.3238910","volume":"39","author":"F Liu","year":"2023","unstructured":"Liu F, Sun F, Fang B, Li X, Sun S, Liu H (2023) Hybrid robotic grasping with a soft multimodal gripper and a deep multistage learning scheme. IEEE Trans Robot 39(3):2379\u20132399. https:\/\/doi.org\/10.1109\/TRO.2023.3238910","journal-title":"IEEE Trans Robot"},{"key":"2125_CR10","unstructured":"Luo F-M, Xu T, Lai H, Chen X-H, Zhang W, Yu Y (2022) A survey on model-based reinforcement learning. arXiv:2206.09328"},{"key":"2125_CR11","unstructured":"Laskin M, Srinivas A, Abbeel P (2020) CURL: contrastive unsupervised representations for reinforcement learning. In: Proceedings of the 37th international conference on machine learning, PMLR, pp 5639\u20135650"},{"issue":"4","key":"2125_CR12","doi-asserted-by":"publisher","first-page":"1024","DOI":"10.1109\/TCCN.2019.2936193","volume":"5","author":"A Sadeghi","year":"2019","unstructured":"Sadeghi A, Wang G, Giannakis GB (2019) Deep reinforcement learning for adaptive caching in hierarchical content delivery networks. IEEE Trans Cogn Commun Netw 5(4):1024\u20131033. https:\/\/doi.org\/10.1109\/TCCN.2019.2936193","journal-title":"IEEE Trans Cogn Commun Netw"},{"key":"2125_CR13","doi-asserted-by":"crossref","unstructured":"Yarats D, Zhang A, Kostrikov I, Amos B, Pineau J, Fergus R (2021). Improving sample efficiency in model-free reinforcement learning from images. In: Proceedings of the AAAI conference on artificial intelligence, vol 35, pp 10674\u201310681","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"2125_CR14","unstructured":"Yarats D, Fergus R, Lazaric A, Pinto L (2021) Mastering visual continuous control: Improved data-augmented reinforcement learning. arXiv:2107.09645"},{"key":"2125_CR15","unstructured":"Hafner D, Pasukonis J, Ba J, Lillicrap T (2023) Mastering diverse domains through world models. arXiv:2301.04104"},{"key":"2125_CR16","unstructured":"Ha D, Schmidhuber J (2018) World models. arXiv:1803.10122"},{"key":"2125_CR17","unstructured":"Doerr A, Daniel C, Schiegg M, Duy N-T, Schaal S, Toussaint M, Sebastian T (2018) Probabilistic recurrent state-space models. In: Proceedings of International conference on machine learning, PMLR, pp 1280\u20131289"},{"key":"2125_CR18","unstructured":"Hafner D, Lillicrap T, Ba J, Norouzi M (2019) Dream to control: learning behaviors by latent imagination. arXiv:1912.01603"},{"key":"2125_CR19","doi-asserted-by":"crossref","unstructured":"Sarantopoulos I, Kiatos M, Doulgeri Z, Malassiotis S (2020) Split deep q-learning for robust object singulation. In: IEEE international conference on robotics and automation, IEEE, pp 6225\u20136231","DOI":"10.1109\/ICRA40945.2020.9196647"},{"issue":"8","key":"2125_CR20","doi-asserted-by":"publisher","first-page":"2357","DOI":"10.1007\/s10845-021-01800-4","volume":"33","author":"C-K Cheng","year":"2022","unstructured":"Cheng C-K, Tsai H-Y (2022) Enhanced detection of diverse defects by developing lighting strategies using multiple light sources based on reinforcement learning. J Intell Manuf 33(8):2357\u20132369","journal-title":"J Intell Manuf"},{"key":"2125_CR21","doi-asserted-by":"publisher","first-page":"75336","DOI":"10.1109\/ACCESS.2021.3081736","volume":"9","author":"L Leontaris","year":"2021","unstructured":"Leontaris L, Dimitriou N, Ioannidis D, Votis K, Tzovaras D, Papageorgiou E (2021) An autonomous illumination system for vehicle documentation based on deep reinforcement learning. IEEE Access 9:75336\u201375348","journal-title":"IEEE Access"},{"key":"2125_CR22","doi-asserted-by":"publisher","first-page":"3003","DOI":"10.1007\/s13042-023-01815-8","volume":"14","author":"A Iriondo","year":"2023","unstructured":"Iriondo A, Lazkano E, Ansuategi A, Rivera A, Lluvia I, Tub\u00edo C (2023) Learning positioning policies for mobile manipulation operations with deep reinforcement learning. Int J Mach Learn Cybern 14:3003\u20133023","journal-title":"Int J Mach Learn Cybern"},{"key":"2125_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2021.118460","volume":"313","author":"H Lee","year":"2022","unstructured":"Lee H, Kim K, Kim N, Cha SW (2022) Energy efficient speed planning of electric vehicles for car-following scenario using model-based reinforcement learning. Appl Energy 313:118460","journal-title":"Appl Energy"},{"key":"2125_CR24","unstructured":"Hafner D, Lillicrap T, Fischer I, Villegas R, Ha D, Lee H, Davidson J (2019) Learning latent dynamics for planning from pixels. In: Proceedings of the 36th international conference on machine learning, PMLR, 2555\u20132565"},{"key":"2125_CR25","unstructured":"Hafner D, Lillicrap T, Norouzi M, Ba J (2020) Mastering ATARI with discrete world models. arXiv:2010.02193"},{"key":"2125_CR26","doi-asserted-by":"crossref","unstructured":"Li X, Shang W, Cong S (2020) Model-based reinforcement learning for robot control. In: International conference on advanced robotics and mechatronics, IEEE, pp 300\u2013305","DOI":"10.1109\/ICARM49381.2020.9195341"},{"key":"2125_CR27","unstructured":"Wang T, Bao X, Clavera I, Hoang J, Wen Y, Langlois E, Zhang S, Zhang G, Abbeel P, Ba J (2019) Benchmarking model-based reinforcement learning. arXiv:1907.02057"},{"key":"2125_CR28","unstructured":"Clavera I, Rothfuss J, Schulman J, Fujita Y, Asfour T, Abbeel P (2018) Model-based reinforcement learning via meta-policy optimization. In: Conference on robot learning, PMLR, pp 617\u2013629"},{"key":"2125_CR29","unstructured":"Kurutach T, Clavera I, Duan Y, Tamar A, Abbeel P (2018) Model-ensemble trust-region policy optimization. arXiv:1802.10592"},{"key":"2125_CR30","unstructured":"Deisenroth M, Rasmussen CE (2011) PILCO: a model-based and data-efficient approach to policy search. In: Proceedings of the 28th international conference on machine learning, pp 465\u2013472"},{"key":"2125_CR31","first-page":"57","volume":"7","author":"MP Deisenroth","year":"2011","unstructured":"Deisenroth MP, Rasmussen CE, Fox D (2011) Learning to control a low-cost manipulator using data-efficient reinforcement learning. Robot Sci Syst VII 7:57\u201364","journal-title":"Robot Sci Syst VII"},{"key":"2125_CR32","doi-asserted-by":"crossref","unstructured":"Nagabandi A, Kahn G, Fearing R.S, Levine S (2018) Neural network dynamics for model-based deep reinforcement learning with model-free fine-tuning. In: IEEE international conference on robotics and automation, IEEE, pp 7559\u20137566","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"2125_CR33","unstructured":"Chua K, Calandra R, McAllister R, Levine S (2018) Deep reinforcement learning in a handful of trials using probabilistic dynamics models. Adv Neural Inf Process Syst 31"},{"key":"2125_CR34","doi-asserted-by":"publisher","unstructured":"Kong L, Castillo I, Peng Y, Rendall R, Wang Z, Trahan D, Bentley D (2023) From open loop to real-time recipe optimization for complex industrial batch processes. In: American control conference, pp 750\u2013755. https:\/\/doi.org\/10.23919\/ACC55779.2023.10156069","DOI":"10.23919\/ACC55779.2023.10156069"},{"key":"2125_CR35","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/s13042-020-01167-7","volume":"12","author":"A Perrusqu\u00eda","year":"2021","unstructured":"Perrusqu\u00eda A, Yu W, Li X (2021) Multi-agent reinforcement learning for redundant robot control in task-space. Int J Mach Learn Cybern 12:231\u2013241","journal-title":"Int J Mach Learn Cybern"},{"key":"2125_CR36","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W (2016) OpenAI Gym"},{"key":"2125_CR37","unstructured":"Stanford Artificial Intelligence Laboratory et al. Robot operating system. https:\/\/www.ros.org"},{"key":"2125_CR38","unstructured":"Wu P, Escontrela A, Hafner D, Abbeel P, Goldberg K (2023) Daydreamer: world models for physical robot learning. In: Conference on robot learning, PMLR, pp 2226\u20132240"},{"key":"2125_CR39","doi-asserted-by":"crossref","unstructured":"Hershey JR, Olsen PA (2007) Approximating the Kullback Leibler divergence between gaussian mixture models. In: 2007 IEEE international conference on acoustics, speech and signal processing-ICASSP\u201907, vol 4. IEEE. p 317.","DOI":"10.1109\/ICASSP.2007.366913"},{"key":"2125_CR40","unstructured":"Kingma DP, Welling M (2013) Auto-encoding variational Bayes. arXiv:1312.6114"},{"issue":"4","key":"2125_CR41","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun Y, Boser B, Denker JS, Henderson D, Howard RE, Hubbard W, Jackel LD (1989) Backpropagation applied to handwritten zip code recognition. Neural Comput 1(4):541\u2013551","journal-title":"Neural Comput"},{"key":"2125_CR42","doi-asserted-by":"crossref","unstructured":"Dey R, Salem FM (2017) Gate-variants of gated recurrent unit (GRU) neural networks. In: IEEE international midwest symposium on circuits and systems, IEEE, pp 1597\u20131600","DOI":"10.1109\/MWSCAS.2017.8053243"},{"key":"2125_CR43","unstructured":"Clevert D-A, Unterthiner T, Hochreiter S (2015) Fast and accurate deep network learning by exponential linear units. arXiv:1511.07289"},{"issue":"3","key":"2125_CR44","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1080\/09540099108946587","volume":"3","author":"RJ Williams","year":"1991","unstructured":"Williams RJ, Peng J (1991) Function optimization using connectionist reinforcement learning algorithms. Connect Sci 3(3):241\u2013268","journal-title":"Connect Sci"},{"key":"2125_CR45","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on machine learning, PMLR, pp 1861\u20131870"},{"key":"2125_CR46","unstructured":"Bellemare M.G, Dabney W, Munos R (2017) A distributional perspective on reinforcement learning. In: Proceedings of international conference on machine learning, PMLR, pp 449\u2013458"},{"key":"2125_CR47","unstructured":"Imani E, White M (2018) Improving regression performance with distributional losses. In: Proceeding of international conference on machine learning, PMLR, pp 2157\u20132166"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02125-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02125-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02125-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,17]],"date-time":"2024-08-17T08:24:59Z","timestamp":1723883099000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02125-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,7]]},"references-count":47,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["2125"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02125-3","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4,7]]},"assertion":[{"value":"21 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 March 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 April 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}