{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T10:40:38Z","timestamp":1743072038348,"version":"3.40.3"},"publisher-location":"Cham","reference-count":11,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030839055"},{"type":"electronic","value":"9783030839062"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-83906-2_18","type":"book-chapter","created":{"date-parts":[[2021,8,24]],"date-time":"2021-08-24T23:05:04Z","timestamp":1629846304000},"page":"226-238","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["No Free Lunch: Overcoming Reward Gaming in AI Safety Gridworlds"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4843-6818","authenticated-orcid":false,"given":"Mariya","family":"Tsvarkaleva","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1426-1896","authenticated-orcid":false,"given":"Louise A.","family":"Dennis","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,8,25]]},"reference":[{"key":"18_CR1","unstructured":"Armstrong, S., Levinstein, B.: Low impact artificial intelligences. CoRR abs\/1705.10720 (2017). http:\/\/arxiv.org\/abs\/1705.10720"},{"key":"18_CR2","unstructured":"Clark, J., Amodei, D.: Faulty reward functions in the wild (2016). https:\/\/blog.openai.com\/faulty-reward-functions\/"},{"key":"18_CR3","unstructured":"Hadfield-Menell, D., Milli, S., Abbeel, P., Russell, S.J., Dragan, A.: Inverse reward design. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems, vol. 30. Curran Associates, Inc. (2017)"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Hessel, M., et al.: Rainbow: combining improvements in deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32, no. 1, April 2018","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"18_CR5","unstructured":"Krakovna, V., Orseau, L., Martic, M., Legg, S.: Measuring and avoiding side effects using relative reachability. CoRR abs\/1806.01186 (2018). http:\/\/arxiv.org\/abs\/1806.01186"},{"key":"18_CR6","unstructured":"Leike, J., et al.: AI safety gridworlds. CoRR abs\/1711.09883 (2017). http:\/\/arxiv.org\/abs\/1711.09883"},{"key":"18_CR7","unstructured":"Mnih, V., et al.: Playing atari with deep reinforcement learning. CoRR abs\/1312.5602 (2013). http:\/\/arxiv.org\/abs\/1312.5602"},{"key":"18_CR8","unstructured":"Santara, A., et al.: Rail: risk-averse imitation learning. In: Proceedings of AAMAS 2018, pp. 2062\u20132063 (2018)"},{"issue":"7587","key":"18_CR9","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016). https:\/\/doi.org\/10.1038\/nature16961","journal-title":"Nature"},{"issue":"6419","key":"18_CR10","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver, D., et al.: A general reinforcement learning algorithm that masters chess, shogi, and go through self-play. Science 362(6419), 1140\u20131144 (2018). https:\/\/doi.org\/10.1126\/science.aar6404","journal-title":"Science"},{"key":"18_CR11","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, 2nd edn. The MIT Press, Cambridge (2018)","edition":"2"}],"container-title":["Lecture Notes in Computer Science","Computer Safety, Reliability, and Security. SAFECOMP 2021 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-83906-2_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T17:41:52Z","timestamp":1710265312000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-83906-2_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030839055","9783030839062"],"references-count":11,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-83906-2_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"25 August 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SAFECOMP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computer Safety, Reliability, and Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"York","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"40","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"safecomp2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/safecomp2021.hosted.york.ac.uk\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"76","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"From the workshops 26 full and 4 short papers were accepted for publication.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}