{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T13:40:03Z","timestamp":1746711603593,"version":"3.40.5"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T00:00:00Z","timestamp":1705536000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T00:00:00Z","timestamp":1705536000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Ann Oper Res"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s10479-023-05798-1","type":"journal-article","created":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T07:02:22Z","timestamp":1705561342000},"page":"1059-1091","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Population-based exploration in reinforcement learning through repulsive reward shaping using eligibility traces"],"prefix":"10.1007","volume":"347","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6755-6677","authenticated-orcid":false,"given":"Melis Ilayda","family":"Bal","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2845-224X","authenticated-orcid":false,"given":"Cem","family":"Iyigun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0509-9153","authenticated-orcid":false,"given":"Faruk","family":"Polat","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5746-9702","authenticated-orcid":false,"given":"Huseyin","family":"Aydin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,1,18]]},"reference":[{"key":"5798_CR1","unstructured":"Amin, S., Gomrokchi, M., Satija, H., van Hoof, H., & Precup, D. (2021). A survey of exploration methods in reinforcement learning. arXiv preprint arXiv:2109.00157."},{"key":"5798_CR2","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1016\/j.future.2022.03.016","volume":"133","author":"H Ayd\u0131n","year":"2022","unstructured":"Ayd\u0131n, H., \u00c7ilden, E., & Polat, F. (2022). Using chains of bottleneck transitions to decompose and solve reinforcement learning tasks with hidden states. Future Generation Computer Systems, 133, 153\u2013168. https:\/\/doi.org\/10.1016\/j.future.2022.03.016","journal-title":"Future Generation Computer Systems"},{"key":"5798_CR3","unstructured":"Barto, A. G., Bradtke, S. J., Singh, S. P. (1991). Real-time learning and control using asynchronous dynamic programming. Tech. Rep., University of Massachusetts at Amherst, Department of Computer and Information Science."},{"key":"5798_CR4","unstructured":"Bellemare, M. G., Srinivasan, S., Ostrovski, G., Schaul, T., Saxton, D., & Munos, R. (2016). Unifying count-based exploration and intrinsic motivation (pp. 1479\u20131487). Curran Associates Inc."},{"key":"5798_CR5","first-page":"953","volume":"3","author":"R Brafman","year":"2001","unstructured":"Brafman, R., & Tennenholtz, M. (2001). R-max\u2013A general polynomial time algorithm for near-optimal reinforcement learning. The Journal of Machine Learning Research, 3, 953\u2013958.","journal-title":"The Journal of Machine Learning Research"},{"key":"5798_CR6","doi-asserted-by":"crossref","unstructured":"Bridle, J. S. (1990). Probabilistic interpretation of feedforward classification network outputs, with relationships to statistical pattern recognition. In F. F. Souli\u00e9 & J. H\u00e9rault (Eds.), Neurocomputing (pp. 227\u2013236). Springer.","DOI":"10.1007\/978-3-642-76153-9_28"},{"key":"5798_CR7","doi-asserted-by":"publisher","first-page":"695","DOI":"10.1002\/(SICI)1098-111X(199710)12:10<695::AID-INT1>3.0.CO;2-T","volume":"12","author":"PVC Caironi","year":"1997","unstructured":"Caironi, P. V. C., & Dorigo, M. (1997). Training and delayed reinforcements in q-learning agents. International Journal of Intelligent Systems, 12, 695\u2013724.","journal-title":"International Journal of Intelligent Systems"},{"key":"5798_CR8","unstructured":"Dabney, W. , Ostrovski, G., & Barreto, A. (2020). Temporally-extended $$\\epsilon $$-greedy exploration. arXiv:2006.01782"},{"key":"5798_CR9","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1007\/BF00115298","volume":"25","author":"P Dayan","year":"1996","unstructured":"Dayan, P., & Sejnowski, T. (1996). Exploration bonuses and dual control. Machine learning, 25, 5\u201322.","journal-title":"Machine learning"},{"key":"5798_CR10","doi-asserted-by":"publisher","DOI":"10.1017\/S026988891900002X","volume":"34","author":"A Demir","year":"2019","unstructured":"Demir, A., \u00c7ilden, E., & Polat, F. (2019). Automatic landmark discovery for learning agents under partial observability. The Knowledge Engineering Review, 34, e11.","journal-title":"The Knowledge Engineering Review"},{"issue":"4","key":"5798_CR11","doi-asserted-by":"publisher","first-page":"1543","DOI":"10.1007\/s13042-022-01713-5","volume":"14","author":"A Demir","year":"2023","unstructured":"Demir, A., \u00c7ilden, E., & Polat, F. (2023). Landmark based guidance for reinforcement learning agents under partial observability. International Journal of Machine Learning and Cybernetics, 14(4), 1543\u20131563.","journal-title":"International Journal of Machine Learning and Cybernetics"},{"key":"5798_CR12","unstructured":"Grzes, M. (2010). Improving exploration in reinforcement learning through domain knowledge and parameter analysis. University of York. https:\/\/etheses.whiterose.ac.uk\/936\/"},{"issue":"2","key":"5798_CR13","first-page":"289","volume":"35","author":"AM Hinz","year":"1989","unstructured":"Hinz, A. M. (1989). The tower of hanoi. Enseignement des Math\u00e9matiques, 35(2), 289\u2013321.","journal-title":"Enseignement des Math\u00e9matiques"},{"key":"5798_CR14","unstructured":"Iqbal, S., & Sha, F. (2021). Coordinated exploration via intrinsic rewards for multi-agent reinforcement learning. arXiv:1905.12127"},{"key":"5798_CR15","unstructured":"Khadka, S., Majumdar, S., Nassar, T., Dwiel, Z., Tumer, E., Miret, S., & Tumer, K. (2019). Collaborative evolutionary reinforcement learning. arXiv:1905.00976"},{"key":"5798_CR16","doi-asserted-by":"crossref","unstructured":"Kolter, J. Z. , & Ng, A. Y. (2009). Near-bayesian exploration in polynomial time. In Proceedings of the 26th annual international conference on machine learning (pp. 513\u2013520). Association for Computing Machinery.","DOI":"10.1145\/1553374.1553441"},{"key":"5798_CR17","unstructured":"Laud, A. D. (2004). Theory and application of reward shaping in reinforcement learning. Unpublished Doctoral dissertation. University of Illinois at Urbana-Champaign."},{"key":"5798_CR18","unstructured":"Mahajan, A. , Rashid, T. , Samvelyan, M., & Whiteson, S. (2019). Maven: Multi-agent variational exploration. In Advances in neural information processing systems, 32."},{"key":"5798_CR19","doi-asserted-by":"crossref","unstructured":"Menache, I. , Mannor, S., & Shimkin, N. (2002). Q-cut - dynamic discovery of sub-goals in reinforcement learning. In Proceedings of the 13th European conference on machine learning (pp. 295\u2013306). Springer.","DOI":"10.1007\/3-540-36755-1_25"},{"key":"5798_CR20","unstructured":"Ng, A. Y., Harada, D., & Russell, S. (1999). Policy invariance under reward transformations: Theory and application to reward shaping. In Proceedings of the 16th international conference on machine learning (pp. 278\u2013287)."},{"key":"5798_CR21","doi-asserted-by":"crossref","unstructured":"Pathak, D., Agrawal, P., Efros, A. A., & Darrell, T. (2017). Curiosity-driven exploration by self-supervised prediction. In Proceedings of the 34th international conference on machine learning (Vol. 70, pp. 2778\u20132787).","DOI":"10.1109\/CVPRW.2017.70"},{"key":"5798_CR22","unstructured":"Rummery, G., & Niranjan, M. (1994). On-line q-learning using connectionist systems. Technical Report CUED\/F-INFENG\/TR 166."},{"key":"5798_CR23","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J. (1991a). Curious model-building control systems. In Proceedings of international joint conference on neural networks (pp. 1458\u20131463).","DOI":"10.1109\/IJCNN.1991.170605"},{"key":"5798_CR24","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J. (1991b). A possibility for implementing curiosity and boredom in model-building neural controllers. In Proceedings of the first international conference on simulation of adaptive behavior on from animals to animals (pp. 222\u2013227), Cambridge, MA, USA.","DOI":"10.7551\/mitpress\/3115.003.0030"},{"key":"5798_CR25","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/BF00114726","volume":"22","author":"S Singh","year":"1995","unstructured":"Singh, S., Sutton, R., & Kaelbling, P. (1995). Reinforcement learning with replacing eligibility traces. Machine Learning, 22, 123\u2013158.","journal-title":"Machine Learning"},{"key":"5798_CR26","doi-asserted-by":"crossref","unstructured":"Sutton, R. S. (1990a). Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In Proceedings of the 7th international conference on machine learning (pp. 216\u2013224).","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"5798_CR27","unstructured":"Sutton, R. S. (1990b). Integrated modeling and control based on reinforcement learning and dynamic programming. In Proceedings of the 3rd international conference on neural information processing systems (pp. 471\u2013478), San Francisco, CA, USA."},{"key":"5798_CR28","unstructured":"Sutton, R. S., & Barto, A. G. (2018). Reinforcement learning: An introduction (2nd ed.). The MIT Press."},{"key":"5798_CR29","doi-asserted-by":"crossref","unstructured":"Tokic, M., & Palm, G. (2011). Value-difference based exploration: Adaptive control between epsilon-greedy and softmax. In J.\u00a0Bach, & S.\u00a0Edelkamp (Eds.), Ki 2011: Advances in artificial intelligence (pp. 335\u2013346).","DOI":"10.1007\/978-3-642-24455-1_33"},{"key":"5798_CR30","unstructured":"Wang, T., Wang, J., Wu, Y., & Zhang, C. (2019). Influence-based multi-agent exploration. arXiv:1910.05512"},{"key":"5798_CR31","unstructured":"Watkins, C. J. C. H. (1989). Learning from delayed rewards. Unpublished doctoral dissertation, King\u2019s College, Cambridge, UK."},{"key":"5798_CR32","unstructured":"Wiering, M. (1999). Explorations in efficient reinforcement learning. Unpublished doctoral dissertation, Universiteit van Amsterdam."}],"container-title":["Annals of Operations Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10479-023-05798-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10479-023-05798-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10479-023-05798-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T13:04:11Z","timestamp":1746709451000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10479-023-05798-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,18]]},"references-count":32,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["5798"],"URL":"https:\/\/doi.org\/10.1007\/s10479-023-05798-1","relation":{},"ISSN":["0254-5330","1572-9338"],"issn-type":[{"type":"print","value":"0254-5330"},{"type":"electronic","value":"1572-9338"}],"subject":[],"published":{"date-parts":[[2024,1,18]]},"assertion":[{"value":"25 November 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 December 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 January 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}