{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T10:22:57Z","timestamp":1743157377093,"version":"3.40.3"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030630065"},{"type":"electronic","value":"9783030630072"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-63007-2_30","type":"book-chapter","created":{"date-parts":[[2020,11,23]],"date-time":"2020-11-23T00:02:39Z","timestamp":1606089759000},"page":"388-399","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Modified I2A Agent for Learning in a Stochastic Environment"],"prefix":"10.1007","author":[{"given":"Constantin-Valentin","family":"Pal","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Florin","family":"Leon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,23]]},"reference":[{"key":"30_CR1","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: International Conference on Machine Learning, pp. 1889\u20131897 (2015)"},{"key":"30_CR2","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms (2017). https:\/\/arxiv.org\/abs\/1707.06347"},{"key":"30_CR3","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937 (2016)"},{"key":"30_CR4","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor (2018). https:\/\/arxiv.org\/abs\/1801.01290"},{"key":"30_CR5","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, M.: Playing Atari with Deep Reinforcement Learning. Preprint at: https:\/\/arxiv.org\/abs\/1801.01290 (2013)"},{"key":"30_CR6","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning (2015). https:\/\/arxiv.org\/abs\/1509.02971"},{"key":"30_CR7","doi-asserted-by":"publisher","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354\u2013359 (2017). https:\/\/doi.org\/10.1038\/nature24270","DOI":"10.1038\/nature24270"},{"issue":"6419","key":"30_CR8","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver, D., Hubert, T., Schrittwieser, J., Antonoglou, I., Lai, M., Guez, A., Lanctot, M., Sifre, L., Kumaran, D., Graepel, T., Lillicrap, T.: A general reinforcement learning algorithm that masters chess, Shogi, and go through self-play. Science 362(6419), 1140\u20131144 (2018)","journal-title":"Science"},{"key":"30_CR9","unstructured":"Talvitie, E.: Model regularization for stable sample rollouts. In: Thirtieth Conference on Uncertainty in Artificial Intelligence, pp. 780\u2013789 (2014)"},{"key":"30_CR10","doi-asserted-by":"crossref","unstructured":"Talvitie, E.: Agnostic system identification for monte carlo planning. In: Twenty-Ninth AAAI Conference on Artificial Intelligence (2015)","DOI":"10.1609\/aaai.v29i1.9616"},{"key":"30_CR11","unstructured":"Racani\u00e8re, S., et al.: Imagination-augmented agents for deep reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 5690\u20135701 (2017)"},{"key":"30_CR12","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-Cam: visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"30_CR13","unstructured":"Lapan, M.: Deep Reinforcement Learning Hands-On: Apply Modern RL Methods, with Deep Q-networks, Value Iteration, Policy Gradients, TRPO, AlphaGo Zero and More. Packt Publishing, Birmingham (2018)"},{"key":"30_CR14","unstructured":"Hafner, D., et al.: Learning latent dynamics for planning from pixels (2018). https:\/\/arxiv.org\/abs\/1811.04551"},{"key":"30_CR15","unstructured":"Ha, D., Schmidhuber, J.: World models (2018). https:\/\/arxiv.org\/abs\/1803.10122"},{"key":"30_CR16","unstructured":"Schrittwieser, J., et al.: Mastering Atari, go, chess and shogi by planning with a learned model (2019). https:\/\/arxiv.org\/abs\/1911.08265"},{"key":"30_CR17","unstructured":"Pal, C.V.: I2AGrid. Online source code (2020). https:\/\/github.com\/ValentinPal\/I2AGrid"}],"container-title":["Lecture Notes in Computer Science","Computational Collective Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-63007-2_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,29]],"date-time":"2022-11-29T07:26:06Z","timestamp":1669706766000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-63007-2_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030630065","9783030630072"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-63007-2_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"23 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICCCI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Collective Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Da Nang","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 December 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iccci2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iccci.pwr.edu.pl\/2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}