{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:42:25Z","timestamp":1742913745387,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":40,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819947607"},{"type":"electronic","value":"9789819947614"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-981-99-4761-4_51","type":"book-chapter","created":{"date-parts":[[2023,7,30]],"date-time":"2023-07-30T16:02:10Z","timestamp":1690732930000},"page":"600-613","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Off-Policy Reinforcement Learning with Loss Function Weighted by Temporal Difference Error"],"prefix":"10.1007","author":[{"given":"Bumgeun","family":"Park","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Taeyoung","family":"Kim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Woohyeon","family":"Moon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sarvar Hussain","family":"Nengroo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongsoo","family":"Har","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,7,31]]},"reference":[{"key":"51_CR1","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"51_CR2","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"51_CR3","doi-asserted-by":"crossref","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529, 484\u2013489 (2016)","DOI":"10.1038\/nature16961"},{"key":"51_CR4","unstructured":"Li, Y.: Deep reinforcement learning: an overview. arXiv preprint arXiv:1701.07274 (2017)"},{"key":"51_CR5","doi-asserted-by":"crossref","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550, 354\u2013359 (2017)","DOI":"10.1038\/nature24270"},{"key":"51_CR6","doi-asserted-by":"publisher","first-page":"118776","DOI":"10.1109\/ACCESS.2019.2936863","volume":"7","author":"M Seo","year":"2019","unstructured":"Seo, M., Vecchietti, L.F., Lee, S., Har, D.: Rewards prediction-based credit assignment for reinforcement learning with sparse binary rewards. IEEE Access 7, 118776\u2013118791 (2019)","journal-title":"IEEE Access"},{"key":"51_CR7","doi-asserted-by":"publisher","first-page":"137449","DOI":"10.1109\/ACCESS.2020.3012204","volume":"8","author":"LF Vecchietti","year":"2020","unstructured":"Vecchietti, L.F., Kim, T., Choi, K., Hong, J., Har, D.: Batch prioritization in multigoal reinforcement learning. IEEE Access 8, 137449\u2013137461 (2020)","journal-title":"IEEE Access"},{"key":"51_CR8","doi-asserted-by":"publisher","first-page":"1515","DOI":"10.1109\/TCYB.2020.2990722","volume":"52","author":"LF Vecchietti","year":"2020","unstructured":"Vecchietti, L.F., Seo, M., Har, D.: Sampling rate decay in hindsight experience replay for robot control. IEEE Trans. Cybern. 52, 1515\u20131526 (2020)","journal-title":"IEEE Trans. Cybern."},{"key":"51_CR9","doi-asserted-by":"crossref","unstructured":"Kim, I., Nengroo, S.H., Har, D.: Reinforcement learning for navigation of mobile robot with LiDAR. In: 2021 5th International Conference on Electronics, Communication and Aerospace Technology (ICECA), pp. 148\u2013154. IEEE (2021)","DOI":"10.1109\/ICECA52323.2021.9675949"},{"key":"51_CR10","doi-asserted-by":"crossref","unstructured":"Moon, W., Park, B., Nengroo, S.H., Kim, T., Har, D.: Path planning of cleaning robot with reinforcement learning. In: 2022 IEEE International Symposium on Robotic and Sensors Environments (ROSE), pp. 1\u20137. IEEE (2022)","DOI":"10.1109\/ROSE56499.2022.9977430"},{"key":"51_CR11","doi-asserted-by":"crossref","unstructured":"Cho, I., Rajendran, P.K., Kim, T., Har, D.: Reinforcement learning for predicting traffic accidents. In: 2023 International Conference on Artificial Intelligence in Information and Communication (ICAIIC), pp. 684\u2013688. IEEE (2023)","DOI":"10.1109\/ICAIIC57133.2023.10067034"},{"key":"51_CR12","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1109\/TG.2021.3065410","volume":"13","author":"C Hong","year":"2021","unstructured":"Hong, C., Jeong, I., Vecchietti, L.F., Har, D., Kim, J.-H.: AI world cup: robot-soccer-based competitions. IEEE Trans. Games 13, 330\u2013341 (2021)","journal-title":"IEEE Trans. Games"},{"key":"51_CR13","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.718","volume":"7","author":"T Kim","year":"2021","unstructured":"Kim, T., Vecchietti, L.F., Choi, K., Sariel, S., Har, D.: Two-stage training algorithm for AI robot soccer. PeerJ Comput. Sci. 7, e718 (2021)","journal-title":"PeerJ Comput. Sci."},{"key":"51_CR14","doi-asserted-by":"crossref","unstructured":"Park, B., Lee, J., Kim, T., Har, D.: Kick-motion training with DQN in AI soccer environment. In: 2023 International Conference on Artificial Intelligence in Information and Communication (ICAIIC), pp. 689\u2013692. IEEE (2023)","DOI":"10.1109\/ICAIIC57133.2023.10067011"},{"key":"51_CR15","doi-asserted-by":"crossref","unstructured":"Yu, Y.: Towards sample efficient reinforcement learning. In: IJCAI, pp. 5739\u20135743 (2018)","DOI":"10.24963\/ijcai.2018\/820"},{"key":"51_CR16","unstructured":"Zhang, S., Sutton, R.S.: A deeper look at experience replay. arXiv preprint arXiv:1712.01275 (2017)"},{"key":"51_CR17","unstructured":"Novati, G., Koumoutsakos, P.: Remember and forget for experience replay. In: International Conference on Machine Learning, pp. 4851\u20134860. PMLR (2019)"},{"key":"51_CR18","unstructured":"Schaul, T., Quan, J., Antonoglou, I., Silver, D.: Prioritized experience replay. arXiv preprint arXiv:1511.05952 (2015)"},{"key":"51_CR19","unstructured":"Jeong, H.: Off-policy temporal difference learning for robotics and autonomous systems. University of Pennsylvania (2020)"},{"key":"51_CR20","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"51_CR21","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)"},{"key":"51_CR22","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1007\/BF00993104","volume":"13","author":"AW Moore","year":"1993","unstructured":"Moore, A.W., Atkeson, C.G.: Prioritized sweeping: reinforcement learning with less data and less time. Mach. Learn. 13, 103\u2013130 (1993)","journal-title":"Mach. Learn."},{"key":"51_CR23","doi-asserted-by":"crossref","unstructured":"Ramicic, M., Bonarini, A.: Entropy-based prioritized sampling in deep Q-learning. In: 2017 2nd International Conference on Image, Vision and Computing (ICIVC), pp. 1068\u20131072. IEEE (2017)","DOI":"10.1109\/ICIVC.2017.7984718"},{"key":"51_CR24","unstructured":"Zhao, R., Sun, X., Tresp, V.: Maximum entropy-regularized multi-goal reinforcement learning. In: International Conference on Machine Learning, pp. 7553\u20137562. PMLR (2019)"},{"key":"51_CR25","unstructured":"Menon, A., Narasimhan, H., Agarwal, S., Chawla, S.: On the statistical consistency of algorithms for binary classification under class imbalance. In: International Conference on Machine Learning, pp. 603\u2013611. PMLR (2013)"},{"key":"51_CR26","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jia, M., Lin, T.-Y., Song, Y., Belongie, S.: Class-balanced loss based on effective number of samples. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9268\u20139277 (2019)","DOI":"10.1109\/CVPR.2019.00949"},{"key":"51_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, K., et al.: Re-weighted interval loss for handling data imbalance problem of end-to-end keyword spotting. In: INTERSPEECH, pp. 2567\u20132571 (2020)","DOI":"10.21437\/Interspeech.2020-1644"},{"key":"51_CR28","unstructured":"Li, M., Zhang, X., Thrampoulidis, C., Chen, J., Oymak, S.: Autobalance: optimized loss functions for imbalanced data. In: Advances in Neural Information Processing Systems, vol. 34, pp. 3163\u20133177 (2021)"},{"key":"51_CR29","unstructured":"Guo, D., Li, Z., Zhao, H., Zhou, M., Zha, H.: Learning to re-weight examples with optimal transport for imbalanced classification. In: Advances in Neural Information Processing Systems, vol. 35, pp. 25517\u201325530 (2022)"},{"key":"51_CR30","unstructured":"Sinha, S., Song, J., Garg, A., Ermon, S.: Experience replay with likelihood-free importance weights. In: Learning for Dynamics and Control Conference, pp. 110\u2013123. PMLR (2022)"},{"key":"51_CR31","unstructured":"Zhang, L., Tang, K., Yao, X.: Log-normality and skewness of estimated state\/action values in reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"51_CR32","unstructured":"Chan, S.C., Lampinen, A.K., Richemond, P.H., Hill, F.: Zipfian environments for reinforcement learning. In: Conference on Lifelong Learning Agents, pp. 406\u2013429. PMLR (2022)"},{"key":"51_CR33","doi-asserted-by":"crossref","unstructured":"Rasmussen, C.E., Williams, C.K.: Gaussian Processes for Machine Learning. Springer, Heidelberg (2006)","DOI":"10.7551\/mitpress\/3206.001.0001"},{"key":"51_CR34","doi-asserted-by":"crossref","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521, 436\u2013444 (2015)","DOI":"10.1038\/nature14539"},{"key":"51_CR35","unstructured":"Brockman, G., et al.: Openai gym. arXiv preprint arXiv:1606.01540 (2016)"},{"key":"51_CR36","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: theory and application to reward shaping. In: Icml, pp. 278\u2013287. Citeseer (1999)"},{"key":"51_CR37","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: MuJoCo: a physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033. IEEE (2012)","DOI":"10.1109\/IROS.2012.6386109"},{"key":"51_CR38","unstructured":"Plappert, M., et al.: Multi-goal reinforcement learning: challenging robotics environments and request for research. arXiv preprint arXiv:1802.09464 (2018)"},{"key":"51_CR39","unstructured":"Chan, S.C., Fishman, S., Canny, J., Korattikara, A., Guadarrama, S.: Measuring the reliability of reinforcement learning algorithms. arXiv preprint arXiv:1912.05663 (2019)"},{"key":"51_CR40","unstructured":"Andrychowicz, M., et al.: Hindsight experience replay. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-4761-4_51","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T23:22:01Z","timestamp":1690932121000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-4761-4_51"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9789819947607","9789819947614"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-4761-4_51","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"31 July 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zhengzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 August 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2023a","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/2023\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}