{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T19:38:21Z","timestamp":1743017901533,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031539657"},{"type":"electronic","value":"9783031539664"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-53966-4_9","type":"book-chapter","created":{"date-parts":[[2024,2,14]],"date-time":"2024-02-14T18:02:29Z","timestamp":1707933749000},"page":"109-123","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["\u00d6kolopoly: Case Study on\u00a0Large Action Spaces in\u00a0Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1463-2706","authenticated-orcid":false,"given":"Raphael C.","family":"Engelhardt","sequence":"first","affiliation":[]},{"given":"Ralitsa","family":"Raycheva","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7109-7813","authenticated-orcid":false,"given":"Moritz","family":"Lange","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6237-740X","authenticated-orcid":false,"given":"Laurenz","family":"Wiskott","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1343-4209","authenticated-orcid":false,"given":"Wolfgang","family":"Konen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,15]]},"reference":[{"key":"9_CR1","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1186\/2046-4053-2-48","volume":"2","author":"O Bosch","year":"2013","unstructured":"Bosch, O., Nguyen, N., Sun, D.: Addressing the critical need for \u201cnew ways of thinking\u2019\u2019 in managing complex issues in a socially responsible way. Bus. Syst. Rev. 2, 48\u201370 (2013)","journal-title":"Bus. Syst. Rev."},{"doi-asserted-by":"publisher","unstructured":"Brockman, G., et al.: OpenAI Gym (2016). https:\/\/doi.org\/10.48550\/arXiv.1606.01540","key":"9_CR2","DOI":"10.48550\/arXiv.1606.01540"},{"issue":"7307","key":"9_CR3","doi-asserted-by":"publisher","first-page":"756","DOI":"10.1038\/nature09304","volume":"466","author":"S Cooper","year":"2010","unstructured":"Cooper, S., et al.: Predicting protein structures with a multiplayer online game. Nature 466(7307), 756\u2013760 (2010). https:\/\/doi.org\/10.1038\/nature09304","journal-title":"Nature"},{"key":"9_CR4","first-page":"411","volume":"13","author":"A Dobrovsky","year":"2019","unstructured":"Dobrovsky, A., Borghoff, U.M., Hofmann, M.: Improving adaptive gameplay in serious games through interactive deep reinforcement learning. Cogn. Infocommun. Theory Appl. 13, 411\u2013432 (2019)","journal-title":"Cogn. Infocommun. Theory Appl."},{"key":"9_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"314","DOI":"10.1007\/978-3-319-74727-9_37","volume-title":"Computer Aided Systems Theory \u2013 EUROCAST 2017","author":"A Dobrovsky","year":"2018","unstructured":"Dobrovsky, A., Wilczak, C.W., Hahn, P., Hofmann, M., Borghoff, U.M.: Deep reinforcement learning in serious games: analysis and design of deep neural network architectures. In: Moreno-D\u00edaz, R., Pichler, F., Quesada-Arencibia, A. (eds.) EUROCAST 2017. LNCS, vol. 10672, pp. 314\u2013321. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-74727-9_37"},{"doi-asserted-by":"publisher","unstructured":"Dulac-Arnold, G., et al.: Deep reinforcement learning in large discrete action spaces (2015). https:\/\/doi.org\/10.48550\/arXiv.1512.07679","key":"9_CR6","DOI":"10.48550\/arXiv.1512.07679"},{"issue":"9","key":"9_CR7","doi-asserted-by":"publisher","first-page":"2419","DOI":"10.1007\/s10994-021-05961-4","volume":"110","author":"G Dulac-Arnold","year":"2021","unstructured":"Dulac-Arnold, G., et al.: Challenges of real-world reinforcement learning: definitions, benchmarks and analysis. Mach. Learn. 110(9), 2419\u20132468 (2021). https:\/\/doi.org\/10.1007\/s10994-021-05961-4","journal-title":"Mach. Learn."},{"unstructured":"Fujimoto, S., van Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: Dy, J., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning, PMLR, vol. 80, pp. 1587\u20131596 (2018)","key":"9_CR8"},{"unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Dy, J., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning, PMLR, vol. 80, pp. 1861\u20131870 (2018)","key":"9_CR9"},{"doi-asserted-by":"crossref","unstructured":"Hornak, D., Jascur, M., Ferencik, N., Bundzel, M.: Proof of concept: using reinforcement learning agent as an adversary in serious games. In: 2019 IEEE International Work Conference on Bioinspired Intelligence, pp. 111\u2013116 (2019)","key":"9_CR10","DOI":"10.1109\/IWOBI47054.2019.9114431"},{"doi-asserted-by":"publisher","unstructured":"Huang, S., Onta\u00f1\u00f3n, S.: A closer look at invalid action masking in policy gradient algorithms. In: The International FLAIRS Conference Proceedings, vol. 35 (2022). https:\/\/doi.org\/10.32473\/flairs.v35i.130584","key":"9_CR11","DOI":"10.32473\/flairs.v35i.130584"},{"issue":"2","key":"9_CR12","doi-asserted-by":"publisher","first-page":"239","DOI":"10.3390\/challe5020239","volume":"5","author":"NC Nguyen","year":"2014","unstructured":"Nguyen, N.C., Bosch, O.J.H.: The art of interconnected thinking: starting with the young. Challenges 5(2), 239\u2013259 (2014). https:\/\/doi.org\/10.3390\/challe5020239","journal-title":"Challenges"},{"unstructured":"Pazis, J., Parr, R.: Generalized value functions for large action sets. In: Proceedings of the 28th International Conference on International Conference on Machine Learning, pp. 1185\u20131192 (2011)","key":"9_CR13"},{"issue":"268","key":"9_CR14","first-page":"1","volume":"22","author":"A Raffin","year":"2021","unstructured":"Raffin, A., Hill, A., Gleave, A., Kanervisto, A., Ernestus, M., Dormann, N.: Stable-baselines3: reliable reinforcement learning implementations. J. Mach. Learn. Res. 22(268), 1\u20138 (2021)","journal-title":"J. Mach. Learn. Res."},{"unstructured":"Raycheva, R.: Erstellung eines custom environments in OpenAI Gym f\u00fcr das Spiel \u00d6kolopoly. Technical report, TH K\u00f6ln (2021)","key":"9_CR15"},{"doi-asserted-by":"publisher","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms (2017). https:\/\/doi.org\/10.48550\/arXiv.1707.06347","key":"9_CR16","DOI":"10.48550\/arXiv.1707.06347"},{"doi-asserted-by":"publisher","unstructured":"Teixeira, J.d.S., Angeluci, A.C.B., Junior, P.P., Martin, J.G.P.: \u2018Let\u2019s play?\u2019 A systematic review of board games in biology. J. Biol. Educ. 1\u201320 (2022). https:\/\/doi.org\/10.1080\/00219266.2022.2041461","key":"9_CR17","DOI":"10.1080\/00219266.2022.2041461"},{"issue":"12","key":"9_CR18","first-page":"713","volume":"39","author":"F Vester","year":"1988","unstructured":"Vester, F.: Der blaue Planet in der Krise. Gewerkschaftliche Monatshefte 39(12), 713\u2013773 (1988)","journal-title":"Gewerkschaftliche Monatshefte"},{"unstructured":"Vester, F.: \u00d6kolopoly: das kybernetische Umweltspiel. Studiengruppe f\u00fcr Biologie und Umwelt (1989)","key":"9_CR19"},{"unstructured":"Zahavy, T., Haroush, M., Merlis, N., Mankowitz, D.J., Mannor, S.: Learn what not to learn: action elimination with deep reinforcement learning. In: Bengio, S., et al. (eds.) Advances in Neural Information Processing Systems, vol. 31 (2018)","key":"9_CR20"}],"container-title":["Lecture Notes in Computer Science","Machine Learning, Optimization, and Data Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-53966-4_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,14]],"date-time":"2024-02-14T18:03:46Z","timestamp":1707933826000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-53966-4_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031539657","9783031539664"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-53966-4_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"15 February 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"LOD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Machine Learning, Optimization, and Data Science","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Grasmere","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mod2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/lod2023.icas.cc\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"In-house system and EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"119","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"72","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"61% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5-6","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1-2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}