{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:21:23Z","timestamp":1750220483344,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,22]],"date-time":"2021-10-22T00:00:00Z","timestamp":1634860800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,22]]},"DOI":"10.1145\/3501409.3501550","type":"proceedings-article","created":{"date-parts":[[2022,1,2]],"date-time":"2022-01-02T06:18:06Z","timestamp":1641104286000},"page":"781-788","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Concatenated Dynamic Reinforcement Learning for Multi-staged Tasks (MST)"],"prefix":"10.1145","author":[{"given":"Yendo","family":"Hu","sequence":"first","affiliation":[{"name":"College of Ocean Information Engineering, Jimei University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xue","family":"Bai","sequence":"additional","affiliation":[{"name":"College of Ocean Information Engineering, Jimei University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weijie","family":"Yang","sequence":"additional","affiliation":[{"name":"College of Ocean Information Engineering, Jimei University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiliang","family":"Wu","sequence":"additional","affiliation":[{"name":"College of Ocean Information Engineering, Jimei University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,12,31]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Sensors","author":"Ding Guanwen","year":"2020","unstructured":"Guanwen Ding, Yubin Liu, Xizhe Zang, Xuehe Zhang, Jie Zhao. A Task-Learning Strategy for Robotic Assembly Tasks from Human Demonstrations. Sensors, 2020"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2999538"},{"key":"e_1_3_2_1_3_1","unstructured":"G Brockman V Cheung L Pettersson J Schneider J Schulman J Tang W Zaremba. OpenAI Gym."},{"key":"e_1_3_2_1_4_1","volume-title":"Martin Riedmiller. Playing Atari with Deep Reinforcement Learning. NIPS Deep Learning Workshop","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, Martin Riedmiller. Playing Atari with Deep Reinforcement Learning. NIPS Deep Learning Workshop, 2013"},{"key":"e_1_3_2_1_5_1","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton Richard S.","year":"2018","unstructured":"Richard S. Sutton, Andrew G. Barto. Reinforcement Learning: An Introduction. Bradford Books; second edition, 2018"},{"key":"e_1_3_2_1_6_1","volume-title":"Practical Reinforcement Learning","author":"Shvechikov A.","year":"2017","unstructured":"Panin, A. and Shvechikov, P., Practical Reinforcement Learning. Coursera and National Research University Higher School of Economics, 2017"},{"key":"e_1_3_2_1_7_1","first-page":"679","article-title":"A Markov decision process","volume":"6","author":"E.","year":"1957","unstructured":"Bellman, R.E., 1957. A Markov decision process. Journal of Mathematical Mechanics, 6, pp. 679--684.","journal-title":"Journal of Mathematical Mechanics"},{"key":"e_1_3_2_1_8_1","volume-title":"Abstract Dynamic Programming","author":"Bertsekas Dimitri P.","year":"2018","unstructured":"Dimitri P. Bertsekas. Abstract Dynamic Programming. Athena Scientific; 2nd edition, 2018","edition":"2"},{"key":"e_1_3_2_1_9_1","volume-title":"JW Kim. Q-learning Algorithms: A Comprehensive Classification and Applications","author":"Jang B","year":"2019","unstructured":"B Jang, M Kim, G Harerimana, JW Kim. Q-learning Algorithms: A Comprehensive Classification and Applications. IEEE, 2019"},{"key":"e_1_3_2_1_10_1","volume-title":"Nature","author":"Volodymyr","year":"2015","unstructured":"Volodymyr Mnih*, Koray Kavukcuoglu*, David Silver1*, Andrei A. Rusu, Joel Veness, Marc G. Bellemare, Alex Graves, Martin Riedmiller, Andreas K. Fidjeland, Georg Ostrovski, Stig Petersen, Charles Beattie, Amir Sadik, Ioannis Antonoglou, Helen King, Dharshan Kumaran, Daan Wierstra, Shane Legg, Demis Hassabis. Human-level Control Through Deep Reinforcement Learning. Nature, 2015"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/SoutheastCon44009.2020.9368267"},{"key":"e_1_3_2_1_12_1","volume-title":"An application of human robot interaction: development of a ping-pong playing robotic arm","author":"Modi K.P.","year":"2005","unstructured":"K.P. Modi, F. Sahin, E. Saber. An application of human robot interaction: development of a ping-pong playing robotic arm. IEEE, 2005"},{"key":"e_1_3_2_1_13_1","volume-title":"On Actor-Critic Algorithms","author":"Konda Vijay R.","year":"2003","unstructured":"Vijay R. Konda, John N. Tsitsiklis. On Actor-Critic Algorithms. Society for Industrial and Applied Mathematics, 2003"},{"key":"e_1_3_2_1_14_1","volume-title":"ICML","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adri\u00e0 Puigdom\u00e8nech Badia, Mehdi Mirza, Alex Graves, Tim Harley, Timothy P. Lillicrap, David Silver, Koray Kavukcuoglu. Asynchronous Methods for Deep Reinforcement Learning. ICML, 2016"},{"key":"e_1_3_2_1_15_1","volume-title":"ICLR","author":"Timothy P.","year":"2016","unstructured":"Timothy P. Lillicrap*, Jonathan J. Hunt*, Alexander Pritzel, Nicolas Heess. Continuous Control with Deep Reinforcement Learning. ICLR, 2016"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/3016100.3016191"},{"key":"e_1_3_2_1_17_1","volume-title":"ICML","author":"Finn Chelsea","year":"2017","unstructured":"Chelsea Finn, Pieter Abbeel, Sergey Levine. Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks. ICML, 2017"},{"key":"e_1_3_2_1_18_1","volume-title":"ICLR","author":"Co-Reyes John D.","year":"2021","unstructured":"John D. Co-Reyes, Yingjie Miao, Daiyi Peng, Esteban Real, Sergey Levine, Quoc V. Le, Honglak Lee, Aleksandra Faust*. Evolving Reinforcement Learning Algorithms. ICLR, 2021"}],"event":{"name":"EITCE 2021: 2021 5th International Conference on Electronic Information Technology and Computer Engineering","acronym":"EITCE 2021","location":"Xiamen China"},"container-title":["Proceedings of the 2021 5th International Conference on Electronic Information Technology and Computer Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3501409.3501550","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3501409.3501550","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:49:01Z","timestamp":1750193341000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3501409.3501550"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,22]]},"references-count":18,"alternative-id":["10.1145\/3501409.3501550","10.1145\/3501409"],"URL":"https:\/\/doi.org\/10.1145\/3501409.3501550","relation":{},"subject":[],"published":{"date-parts":[[2021,10,22]]},"assertion":[{"value":"2021-12-31","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}