{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T04:48:57Z","timestamp":1780548537747,"version":"3.54.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T00:00:00Z","timestamp":1692921600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T00:00:00Z","timestamp":1692921600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-023-02114-3","type":"journal-article","created":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T16:02:19Z","timestamp":1692979339000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Coupling Effect of Exploration Rate and Learning Rate for Optimized Scaled Reinforcement Learning"],"prefix":"10.1007","volume":"4","author":[{"given":"Smriti","family":"Gupta","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sabita","family":"Pal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kundan","family":"Kumar","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2670-5095","authenticated-orcid":false,"given":"Kuntal","family":"Ghosh","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,8,25]]},"reference":[{"key":"2114_CR1","unstructured":"Amit R, Meir R, Ciosek K. Discount factor as a regularizer in reinforcement learning. In: International conference on machine learning. PMLR, 2020."},{"key":"2114_CR2","doi-asserted-by":"crossref","unstructured":"Arana-Daniel N, Rosales-Ochoa R, Lo\u00b4pez-Franco C. Reinforced-slam for path planning and mapping in dynamic environments. In: 2011 8th international conference on electrical engineering, computing science and automatic control. IEEE; 2011. pp. 1\u20136.","DOI":"10.1109\/ICEEE.2011.6106563"},{"key":"2114_CR3","doi-asserted-by":"crossref","unstructured":"Arribas T, Go\u00b4mez M, Sa\u00b4nchez S. Optimal motion planning based on CACM-RL using slam. In: 2012 IEEE intelligent vehicles symposium. IEEE; 2012. pp. 75\u201380.","DOI":"10.1109\/IVS.2012.6232204"},{"key":"2114_CR4","doi-asserted-by":"publisher","first-page":"329","DOI":"10.5194\/isprs-archives-XLIII-B4-2020-329-2020","volume":"43","author":"N Botteghi","year":"2020","unstructured":"Botteghi N, Sirmacek B, Schulte R, Poel M, Brune C. Reinforcement learning helps slam: learning to build maps. Int Arch Photogramm Remote Sens Spat Inf Sci. 2020;43:329\u201335.","journal-title":"Int Arch Photogramm Remote Sens Spat Inf Sci"},{"key":"2114_CR5","unstructured":"Botteghi N, Sirmacek B, Mustafa KAA, Poel M, Stramigioli S. On reward shaping for mobile robot navigation: a reinforcement learning and SLAM based approach. arXiv preprint arXiv:2002.04109. 2020."},{"issue":"2","key":"2114_CR6","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu L, Babuska R, De Schutter B. A comprehensive survey of multiagent reinforcement learning. IEEE Trans Syst Man Cybern Part C Appl Rev. 2008;38(2):156\u201372.","journal-title":"IEEE Trans Syst Man Cybern Part C Appl Rev"},{"key":"2114_CR7","unstructured":"Chaplot DS, Gandhi D, Gupta S, Gupta A, Salakhutdinov R. Learning to explore using active neural slam. arXiv preprint arXiv:2004.05155. 2020."},{"key":"2114_CR8","doi-asserted-by":"crossref","unstructured":"Chen Y, Schomaker L, Wiering MA. An investigation into the effect of the learning rate on overestimation bias of connectionist Q-learning. In: ICAART (2), 2021. pp. 107\u2013118.","DOI":"10.5220\/0010227301070118"},{"key":"2114_CR9","unstructured":"Even-Dar, E, Mansour Y, Bartlett, P. Learning rates for Q-learning. J. Mach. Lear. Res. 5(1), 2003."},{"key":"2114_CR10","doi-asserted-by":"crossref","unstructured":"Gerke M, Hoyer, H. Planning of optimal paths for autonomous agents moving in inhomogeneous environments. In: 1997 th International conference on advanced robotics. Proceedings. ICAR\u201997, IEEE; 1997. pp. 347\u2013352.","DOI":"10.1109\/ICAR.1997.620205"},{"key":"2114_CR11","doi-asserted-by":"crossref","unstructured":"He Y-S, Tang, Y-Y. Path planning of virtual human by using reinforcement learning. In: 2008 International conference on machine learning and cybernetics, Vol. 2. IEEE; 2008. pp. 987\u2013992.","DOI":"10.1109\/ICMLC.2008.4620548"},{"issue":"3","key":"2114_CR12","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1017\/S0263574714000289","volume":"33","author":"M Hoy","year":"2015","unstructured":"Hoy M, Matveev AS, Savkin AV. Algorithms for collision-free navigation of mobile robots in complex cluttered environments: a survey. Robotica. 2015;33(3):463\u201397.","journal-title":"Robotica"},{"key":"2114_CR13","doi-asserted-by":"crossref","unstructured":"Bradley Knox W, Stone P. (2012) Reinforcement learning from human reward: discounting in episodic tasks. In: 2012 IEEE RO-MAN: The 21st IEEE international symposium on robot and human interactive communication, IEEE; 2012. pp. 878\u2013885.","DOI":"10.1109\/ROMAN.2012.6343862"},{"issue":"5","key":"2114_CR14","doi-asserted-by":"publisher","first-page":"1141","DOI":"10.1109\/TSMCA.2012.2227719","volume":"43","author":"A Konar","year":"2013","unstructured":"Konar A, Chakraborty IG, Singh SJ, Jain LC, Nagar AK. A deterministic improved q-learning for path planning of a mobile robot. IEEE Trans Syst Man Cybern Syst. 2013;43(5):1141\u201353.","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"issue":"9","key":"2114_CR15","doi-asserted-by":"publisher","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","volume":"50","author":"TT Nguyen","year":"2020","unstructured":"Nguyen TT, Nguyen ND, Nahavandi S. Deep reinforcement learning for multiagent systems: a review of challenges, solutions, and applications. IEEE Trans Cybern. 2020;50(9):3826\u201339.","journal-title":"IEEE Trans Cybern"},{"key":"2114_CR16","doi-asserted-by":"crossref","unstructured":"Pandey D, Pandey P. Approximate Q-learning: an introduction. In: 2010 Second international conference on machine learning and computing. IEEE; 2010. pp. 317\u2013320.","DOI":"10.1109\/ICMLC.2010.38"},{"key":"2114_CR17","unstructured":"Pham HX, La HM, Feil-Seifer D, Nguyen LV. Autonomous UAV navigation using reinforcement learning. arXiv preprint arXiv:1801.05086. 2018."},{"key":"2114_CR18","doi-asserted-by":"crossref","unstructured":"Pham HX, La HM, Feil-Seifer D, Nguyen LV. Reinforcement learning for autonomous UAV navigation using function approximation. In: 2018 IEEE International symposium on safety, security, and rescue robotics (SSRR). IEEE; 2018; pp. 1\u20136.","DOI":"10.1109\/SSRR.2018.8468611"},{"issue":"10","key":"2114_CR19","doi-asserted-by":"publisher","first-page":"85","DOI":"10.3182\/20130626-3-AU-2035.00042","volume":"46","author":"S Ragi","year":"2013","unstructured":"Ragi S, Tan CS, Edwin K, Chong P. Feasibility study of POMDP in autonomous amphibious vehicle guidance. IFAC Proc Vol. 2013;46(10):85\u201390.","journal-title":"IFAC Proc Vol"},{"issue":"1","key":"2114_CR20","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s40638-018-0084-8","volume":"5","author":"MD Muhaimin Rahman","year":"2018","unstructured":"Muhaimin Rahman MD, Hasanur Rashid SM, Hossain MM. Implementation of Q learning and deep Q network for controlling a self balancing robot model. Robot Biomimet. 2018;5(1):1\u20136.","journal-title":"Robot Biomimet"},{"key":"2114_CR21","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.robot.2017.03.003","volume":"92","author":"AK Sadhu","year":"2017","unstructured":"Sadhu AK, Konar A. Improving the speed of convergence of multi-agent q-learning for cooperative task-planning by a robot-team. Robot Auton Syst. 2017;92:66\u201380.","journal-title":"Robot Auton Syst"},{"issue":"8","key":"2114_CR22","first-page":"2779","volume":"50","author":"AK Sadhu","year":"2018","unstructured":"Sadhu AK, Konar A. An efficient computing of correlated equilibrium for cooperative Q-learning-based multi-robot planning. IEEE Trans Syst Man Cybern: Syst. 2018;50(8):2779\u201394.","journal-title":"IEEE Trans Syst Man Cybern: Syst"},{"issue":"1","key":"2114_CR23","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/S0893-6080(02)00228-9","volume":"16","author":"N Schweighofer","year":"2003","unstructured":"Schweighofer N, Doya K. Meta-learning in reinforcement learning. Neural Netw. 2003;16(1):5\u20139.","journal-title":"Neural Netw"},{"key":"2114_CR24","unstructured":"Spiros K, Daniel K. Reinforcement learning of coordination in cooperative mas. In: The 18th national conference on AI, Alberta, Canada: ACM Press; 2002. pp. 326\u2013331"},{"key":"2114_CR25","doi-asserted-by":"publisher","first-page":"18382","DOI":"10.1109\/ACCESS.2017.2746752","volume":"5","author":"J Sun","year":"2017","unstructured":"Sun J, Tang J, Lao S. Collision avoidance for cooperative UAVS with optimized artificial potential field algorithm. IEEE Access. 2017;5:18382\u201390.","journal-title":"IEEE Access"},{"key":"2114_CR26","doi-asserted-by":"crossref","unstructured":"Tijsma AD, Drugan MM, Wiering MA. Comparing exploration strategies for Q-learning in random stochastic mazes. In: 2016 IEEE symposium series on computational intelligence (SSCI), pp. 1\u20138. IEEE; 2016.","DOI":"10.1109\/SSCI.2016.7849366"},{"issue":"3","key":"2114_CR27","doi-asserted-by":"publisher","first-page":"2124","DOI":"10.1109\/TVT.2018.2890773","volume":"68","author":"C Wang","year":"2019","unstructured":"Wang C, Wang J, Shen Y, Zhang X. Autonomous navigation of uavs in large-scale complex environments: a deep reinforcement learning approach. IEEE Trans Veh Technol. 2019;68(3):2124\u201336.","journal-title":"IEEE Trans Veh Technol"},{"key":"2114_CR28","doi-asserted-by":"crossref","unstructured":"Wang Z, Shi Z, Li Y, Tu J. The optimization of path planning for multi-robot system using Boltzmann policy based Q-learning algorithm. In: 2013 IEEE international conference on robotics and biomimetics (ROBIO). IEEE; 2013. pp. 1199\u20131204.","DOI":"10.1109\/ROBIO.2013.6739627"},{"issue":"2","key":"2114_CR29","doi-asserted-by":"publisher","first-page":"621","DOI":"10.1109\/TMECH.2019.2899365","volume":"24","author":"Z Wang","year":"2019","unstructured":"Wang Z, Chen C, Li H-X, Dong D, Tarn T-J. Incremental reinforcement learning with prioritized sweeping for dynamic environments. IEEE\/ASME Trans Mechatron. 2019;24(2):621\u201332.","journal-title":"IEEE\/ASME Trans Mechatron"},{"key":"2114_CR30","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.robot.2015.04.003","volume":"72","author":"S Wen","year":"2015","unstructured":"Wen S, Chen X, Ma C, Lam H-K, Hua S. The Q-learning obstacle avoidance algorithm based on EKF-SLAM for NAO autonomous walking under unknown environments. Robot Auton Syst. 2015;72:29\u201336.","journal-title":"Robot Auton Syst"},{"key":"2114_CR31","doi-asserted-by":"crossref","unstructured":"Yoshida N, Uchibe E, Doya K. Reinforcement learning with state-dependent discount factor. In: 2013 IEEE third joint international conference on development and learning and epigenetic robotics (ICDL). IEEE; 2013. pp 1\u20136.","DOI":"10.1109\/DevLrn.2013.6652533"},{"issue":"1","key":"2114_CR32","volume":"1","author":"Yu Xinglin","year":"2021","unstructured":"Xinglin Yu, Yuhu Wu, Sun X-M, Zhou W. A memory-greedy policy with guaranteed convergence for accelerating reinforcement learning. J Auto Veh Sys. 2021;1(1): 011005.","journal-title":"J Auto Veh Sys"},{"issue":"6","key":"2114_CR33","doi-asserted-by":"publisher","first-page":"1367","DOI":"10.1109\/TCYB.2016.2544866","volume":"47","author":"Z Zhang","year":"2016","unstructured":"Zhang Z, Zhao D, Gao J, Wang D, Dai Y. Fmrq\u2014a multiagent reinforcement learning algorithm for fully cooperative tasks. IEEE Trans Cybern. 2016;47(6):1367\u201379.","journal-title":"IEEE Trans Cybern"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-023-02114-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-023-02114-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-023-02114-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T21:54:24Z","timestamp":1729979664000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-023-02114-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,25]]},"references-count":33,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2023,9]]}},"alternative-id":["2114"],"URL":"https:\/\/doi.org\/10.1007\/s42979-023-02114-3","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,25]]},"assertion":[{"value":"14 April 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 June 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 August 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"All the authors mentioned in the manuscript have agreed for authorship, read and approved the manuscript, and given consent for submission and subsequent publication of the manuscript.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics Approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to Participate"}},{"value":"All the authors mentioned in the manuscript have agreed to the publication of the manuscript.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to Publication"}}],"article-number":"638"}}