{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T15:33:04Z","timestamp":1742916784661,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030958916"},{"type":"electronic","value":"9783030958923"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-95892-3_32","type":"book-chapter","created":{"date-parts":[[2022,4,7]],"date-time":"2022-04-07T07:02:54Z","timestamp":1649314974000},"page":"417-428","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Adaptive Eligibility Traces for Online Deep Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Taisuke","family":"Kobayashi","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,4,8]]},"reference":[{"key":"32_CR1","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization. arXiv preprint arXiv:1607.06450 (2016)"},{"key":"32_CR2","unstructured":"Brockman, G., et al.: Openai gym. arXiv preprint arXiv:1606.01540 (2016)"},{"key":"32_CR3","unstructured":"Coumans, E., Bai, Y.: Pybullet, a python module for physics simulation for games, robotics and machine learning. GitHub repository (2016)"},{"key":"32_CR4","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.neunet.2017.12.012","volume":"107","author":"S Elfwing","year":"2018","unstructured":"Elfwing, S., Uchibe, E., Doya, K.: Sigmoid-weighted linear units for neural network function approximation in reinforcement learning. Neural Netw. 107, 3\u201311 (2018)","journal-title":"Neural Netw."},{"key":"32_CR5","doi-asserted-by":"crossref","unstructured":"Hayes, T.L., Cahill, N.D., Kanan, C.: Memory efficient experience replay for streaming learning. In: International Conference on Robotics and Automation, pp. 9769\u20139776. IEEE (2019)","DOI":"10.1109\/ICRA.2019.8793982"},{"key":"32_CR6","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"issue":"12","key":"32_CR7","doi-asserted-by":"publisher","first-page":"4335","DOI":"10.1007\/s10489-019-01510-8","volume":"49","author":"T Kobayashi","year":"2019","unstructured":"Kobayashi, T.: Student-t policy in reinforcement learning to acquire global optimum of robot control. Appl. Intell. 49(12), 4335\u20134347 (2019). https:\/\/doi.org\/10.1007\/s10489-019-01510-8","journal-title":"Appl. Intell."},{"key":"32_CR8","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, pp. 1097\u20131105 (2012)"},{"issue":"4\u20135","key":"32_CR9","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1177\/0278364917710318","volume":"37","author":"S Levine","year":"2018","unstructured":"Levine, S., Pastor, P., Krizhevsky, A., Ibarz, J., Quillen, D.: Learning hand-eye coordination for robotic grasping with deep learning and large-scale data collection. Int. J. Robot. Res. 37(4\u20135), 421\u2013436 (2018)","journal-title":"Int. J. Robot. Res."},{"issue":"3\u20134","key":"32_CR10","first-page":"293","volume":"8","author":"LJ Lin","year":"1992","unstructured":"Lin, L.J.: Self-improving reactive agents based on reinforcement learning, planning and teaching. Mach. Learn. 8(3\u20134), 293\u2013321 (1992)","journal-title":"Mach. Learn."},{"issue":"3","key":"32_CR11","doi-asserted-by":"publisher","first-page":"655","DOI":"10.1109\/TCYB.2015.2412554","volume":"46","author":"H Modares","year":"2015","unstructured":"Modares, H., Ranatunga, I., Lewis, F.L., Popa, D.O.: Optimized assistive human-robot interaction using reinforcement learning. IEEE Trans. Cybern. 46(3), 655\u2013667 (2015)","journal-title":"IEEE Trans. Cybern."},{"key":"32_CR12","doi-asserted-by":"crossref","unstructured":"Parisi, S., Tangkaratt, V., Peters, J., Khan, M.E.: Td-regularized actor-critic methods. Machine Learning, pp. 1\u201335 (2019)","DOI":"10.1007\/s10994-019-05788-0"},{"key":"32_CR13","unstructured":"Paszke, A., et al.: Automatic differentiation in pytorch. In: Advances in Neural Information Processing Systems Workshop (2017)"},{"key":"32_CR14","unstructured":"Ramachandran, P., Zoph, B., Le, Q.V.: Swish: a self-gated activation function. arXiv preprint arXiv:1710.05941 7 (2017)"},{"key":"32_CR15","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M., Abbeel, P.: High-dimensional continuous control using generalized advantage estimation. In: International Conference on Learning Representations (2016)"},{"key":"32_CR16","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"32_CR17","unstructured":"van Seijen, H.: Effective multi-step temporal-difference learning for non-linear function approximation. arXiv preprint arXiv:1608.05151 (2016)"},{"key":"32_CR18","doi-asserted-by":"crossref","unstructured":"Silver, D., et\u00a0al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484 (2016)","DOI":"10.1038\/nature16961"},{"issue":"1\u20133","key":"32_CR19","first-page":"123","volume":"22","author":"SP Singh","year":"1996","unstructured":"Singh, S.P., Sutton, R.S.: Reinforcement learning with replacing eligibility traces. Mach. Learn. 22(1\u20133), 123\u2013158 (1996)","journal-title":"Mach. Learn."},{"key":"32_CR20","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press (2018)"},{"issue":"1","key":"32_CR21","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1002\/wics.56","volume":"2","author":"ST Tokdar","year":"2010","unstructured":"Tokdar, S.T., Kass, R.E.: Importance sampling: a review. Wiley Interdisciplinary Rev. Comput. Stat. 2(1), 54\u201360 (2010)","journal-title":"Wiley Interdisciplinary Rev. Comput. Stat."},{"key":"32_CR22","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1016\/j.robot.2018.11.004","volume":"112","author":"Y Tsurumine","year":"2019","unstructured":"Tsurumine, Y., Cui, Y., Uchibe, E., Matsubara, T.: Deep reinforcement learning with smooth policy update: application to robotic cloth manipulation. Robot. Auton. Syst. 112, 72\u201383 (2019)","journal-title":"Robot. Auton. Syst."},{"issue":"1","key":"32_CR23","first-page":"5057","volume":"17","author":"H Van Seijen","year":"2016","unstructured":"Van Seijen, H., Mahmood, A.R., Pilarski, P.M., Machado, M.C., Sutton, R.S.: True online temporal-difference learning. J. Mach. Learn. Res. 17(1), 5057\u20135096 (2016)","journal-title":"J. Mach. Learn. Res."}],"container-title":["Lecture Notes in Networks and Systems","Intelligent Autonomous Systems 16"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-95892-3_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,7]],"date-time":"2022-04-07T07:15:03Z","timestamp":1649315703000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-95892-3_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030958916","9783030958923"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-95892-3_32","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"type":"print","value":"2367-3370"},{"type":"electronic","value":"2367-3389"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"8 April 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Autonomous Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 June 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 June 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ias2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ias-16.com","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}