{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T23:47:35Z","timestamp":1767138455793,"version":"build-2238731810"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030976712","type":"print"},{"value":"9783030976729","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-97672-9_42","type":"book-chapter","created":{"date-parts":[[2022,3,31]],"date-time":"2022-03-31T00:03:10Z","timestamp":1648684990000},"page":"462-473","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Novel Reinforcement Learning Sampling Method Without Additional Environment Feedback in\u00a0Hindsight Experience Replay"],"prefix":"10.1007","author":[{"given":"Chenxing","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yinlong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenshan","family":"Bing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jan","family":"Seyler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shahram","family":"Eivazi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,4,1]]},"reference":[{"key":"42_CR1","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, vol. 1. MIT Press Cambridge (1998)"},{"key":"42_CR2","doi-asserted-by":"publisher","unstructured":"Wiering, M., Van Otterlo, M. (eds.): Reinforcement learning. In: Adaptation, Learning, and Optimization, vol. 12. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-27645-3","DOI":"10.1007\/978-3-642-27645-3"},{"key":"42_CR3","unstructured":"Goodfellow, I., Bengio, Y., Courville, A., Bengio, Y.: Deep Learning, vol. 1. MIT Press Cambridge (2016)"},{"key":"42_CR4","doi-asserted-by":"crossref","unstructured":"Arulkumaran, K., Cully, A., Togelius, J.: AlphaStar: an evolutionary computation perspective. In: Proceedings of the Genetic and Evolutionary Computation Conference Companion, pp. 314\u2013315 (2019)","DOI":"10.1145\/3319619.3321894"},{"key":"42_CR5","unstructured":"Wang, X., et al.: SCC: an efficient deep reinforcement learning agent mastering the game of StarCraft II. In: International Conference on Machine Learning, PMLR 2021, pp. 10905\u201310915 (2021)"},{"key":"42_CR6","unstructured":"Andrychowicz, F. et al.: Hindsight experience replay. In: Advances in Neural Information Processing Systems, p. 50485058 (2017)"},{"key":"42_CR7","unstructured":"Zhao, R., Tresp, V.: Energy-based hindsight experience prioritization. In: Conference on Robot Learning, PMLR 2018, pp. 113\u2013122 (2018)"},{"key":"42_CR8","doi-asserted-by":"crossref","unstructured":"Nguyen, H., La, H.: Review of deep reinforcement learning for robot manipulation. In: 2019 3rd IEEE International Conference on Robotic Computing (IRC), pp. 590\u2013595. IEEE (2019)","DOI":"10.1109\/IRC.2019.00120"},{"key":"42_CR9","unstructured":"Gallou\u00e9dec, Q., Cazin, N., Dellandr\u00e9a, E., Chen, L.: Multi-goal reinforcement learning environments for simulated Franka Emika Panda robot. arXiv arXiv:2106.13687 [cs.LG] (2021)"},{"key":"42_CR10","unstructured":"Brockman, G., et al.: OpenAI Gym. arXiv preprint arXiv:1606.01540 (2016)"},{"issue":"3\u20134","key":"42_CR11","first-page":"293321","volume":"8","author":"L-J Lin","year":"1992","unstructured":"Lin, L.-J.: Self-improving reactive agents based on reinforcement learning, planning and teaching. Mach. Learn. 8(3\u20134), 293321 (1992)","journal-title":"Mach. Learn."},{"issue":"7540","key":"42_CR12","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529 (2015)","journal-title":"Nature"},{"key":"42_CR13","unstructured":"Schaul, T., Quan, J., Antonoglou, I., Silver, D.: Prioritized experience replay. In: International Conference on Learning Representations (2016)"},{"issue":"1","key":"42_CR14","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/0010-0277(93)90058-4","volume":"48","author":"JL Elman","year":"1993","unstructured":"Elman, J.L.: Learning and development in neural networks: the importance of starting small. Cognition 48(1), 71\u201399 (1993)","journal-title":"Cognition"},{"key":"42_CR15","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Louradour, J., Collobert, R., Weston, J.: Curriculum learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, pp. 41\u201348. ACM (2009)","DOI":"10.1145\/1553374.1553380"},{"key":"42_CR16","unstructured":"Zaremba, W., Sutskever, I.: Learning to execute. arXiv preprint arXiv:1410.4615 (2014)"},{"key":"42_CR17","unstructured":"Graves, A., Bellemare, M.G., Menick, J., Munos, R., Kavukcuoglu, K.: Automated curriculum learning for neural networks. arXiv preprint arXiv:1704.03003 (2017)"},{"key":"42_CR18","unstructured":"Sukhbaatar, S., Lin, Z., Kostrikov, I., Synnaeve, G., Szlam, A., Fergus, R.: Intrinsic motivation and automatic curricula via asymmetric self-play. arXiv preprint arXiv:1703.05407 (2017)"},{"key":"42_CR19","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.neunet.2013.01.022","volume":"41","author":"RK Srivastava","year":"2013","unstructured":"Srivastava, R.K., Steunebrink, B.R., Schmidhuber, J.: First experiments with powerplay. Neural Netw. 41, 130\u2013136 (2013)","journal-title":"Neural Netw."},{"issue":"3","key":"42_CR20","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1023\/B:MACH.0000015880.99707.b2","volume":"54","author":"J Schmidhuber","year":"2004","unstructured":"Schmidhuber, J.: Optimal ordered problem solver. Mach. Learn. 54(3), 211\u2013254 (2004)","journal-title":"Mach. Learn."},{"key":"42_CR21","unstructured":"Florensa, C., Held, D., Wulfmeier, M., Abbeel, P.: Reverse curriculum generation for reinforcement learning. arXiv preprint arXiv:1707.05300 (2017)"},{"key":"42_CR22","unstructured":"Thrun, S.B.: Efficient exploration in reinforcement learning (1992)"},{"key":"42_CR23","doi-asserted-by":"crossref","unstructured":"Puterman, M.L.: Markov decision processes. In: Handbooks in Operations Research and Management Science, vol. 2, pp. 331\u2013434 (1990)","DOI":"10.1016\/S0927-0507(05)80172-0"},{"key":"42_CR24","doi-asserted-by":"crossref","unstructured":"Garcia, F., Rachelson, E.: Markov decision processes. In: Markov Decision Processing Artificial Intelligence, pp. 1\u201338 (2013)","DOI":"10.1002\/9781118557426.ch1"},{"issue":"1","key":"42_CR25","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1016\/j.artmed.2012.12.003","volume":"57","author":"CC Bennett","year":"2013","unstructured":"Bennett, C.C., Hauser, K.: Artificial intelligence framework for simulating clinical decision-making: a Markov decision process approach. Artif. Intel. Med. 57(1), 9\u201319 (2013)","journal-title":"Artif. Intel. Med."},{"key":"42_CR26","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. In: International Conference on Learning Representations (2016)"},{"issue":"4","key":"42_CR27","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1137\/0330046","volume":"30","author":"BT Polyak","year":"1992","unstructured":"Polyak, B.T., Juditsky, A.B.: Acceleration of stochastic approximation by averaging. SIAM J. Control. Optim. 30(4), 838\u2013855 (1992)","journal-title":"SIAM J. Control. Optim."},{"key":"42_CR28","unstructured":"Dearden, R., Friedman, N., Russell, S.: Bayesian Q-learning. In: AAAI\/IAAI 1998, pp. 761\u2013768 (1998)"},{"key":"42_CR29","unstructured":"Haarnoja, T., et al.: Soft actor-critic algorithms and applications. arXiv preprint arXiv:1812.05905 (2018)"},{"key":"42_CR30","doi-asserted-by":"crossref","unstructured":"Zhao, D., Wang, H., Shao, K., Zhu, Y.: Deep reinforcement learning with experience replay based on SARSA. In: 2016 IEEE Symposium Series on Computational Intelligence (SSCI), pp. 1\u20136. IEEE (2016)","DOI":"10.1109\/SSCI.2016.7849837"}],"updated-by":[{"DOI":"10.1007\/978-3-030-97672-9_54","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2022,9,15]],"date-time":"2022-09-15T00:00:00Z","timestamp":1663200000000}}],"container-title":["Lecture Notes in Networks and Systems","Robot Intelligence Technology and Applications 6"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-97672-9_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,14]],"date-time":"2022-09-14T02:12:38Z","timestamp":1663121558000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-97672-9_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030976712","9783030976729"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-97672-9_42","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"value":"2367-3370","type":"print"},{"value":"2367-3389","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"1 April 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"15 September 2022","order":2,"name":"change_date","label":"Change Date","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Correction","order":3,"name":"change_type","label":"Change Type","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The original version of this chapter was inadvertently published with incorrect authors\u2019 last names in Ref. [9], which have now been corrected from \u201cGalloup\u00e9dec, Q., Cazin, N., Dellandrp\u00e9a, E., Chen, L\u201d to \u201cGallou\u00e9dec, Q., Cazin, N., Dellandr\u00e9a, E., Chen, L\u201d. The chapter has been updated with the changes.","order":4,"name":"change_details","label":"Change Details","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"RiTA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Robot Intelligence Technology and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 December 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 December 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"rita2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}