{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T11:34:55Z","timestamp":1762342495148,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.1145\/3672608.3707766","type":"proceedings-article","created":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T18:26:21Z","timestamp":1747247181000},"page":"1090-1097","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Explainable Reinforcement Learning for Formula One Race Strategy"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9526-1747","authenticated-orcid":false,"given":"Devin","family":"Thomas","sequence":"first","affiliation":[{"name":"Imperial College London, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7007-0560","authenticated-orcid":false,"given":"Junqi","family":"Jiang","sequence":"additional","affiliation":[{"name":"Imperial College London, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5878-3584","authenticated-orcid":false,"given":"Avinash","family":"Kori","sequence":"additional","affiliation":[{"name":"Imperial College London, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5757-1211","authenticated-orcid":false,"given":"Aaron","family":"Russo","sequence":"additional","affiliation":[{"name":"Mercedes-AMG Petronas Formula One Team, Brackley, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6790-5515","authenticated-orcid":false,"given":"Steffen","family":"Winkler","sequence":"additional","affiliation":[{"name":"Mercedes-AMG Petronas Formula One Team, Brackley, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2155-0995","authenticated-orcid":false,"given":"Stuart","family":"Sale","sequence":"additional","affiliation":[{"name":"Mercedes-AMG Petronas Formula One Team, Brackley, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5654-1238","authenticated-orcid":false,"given":"Joseph","family":"McMillan","sequence":"additional","affiliation":[{"name":"Mercedes-AMG Petronas Formula One Team, Brackley, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7768-1794","authenticated-orcid":false,"given":"Francesco","family":"Belardinelli","sequence":"additional","affiliation":[{"name":"Imperial College London, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5323-7739","authenticated-orcid":false,"given":"Antonio","family":"Rago","sequence":"additional","affiliation":[{"name":"Imperial College London, London, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2025,5,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/J.INFFUS.2023.101805"},{"key":"e_1_3_2_1_2_1","unstructured":"Osbert Bastani Yewen Pu and Armando Solar-Lezama. 2018. Verifiable Reinforcement Learning via Policy Extraction. In NeurIPS. 2499\u20132509. https:\/\/proceedings.neurips.cc\/paper\/2018\/hash\/e6d8545daa42d5ced125a4bf747b3688-Abstract.html"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467166"},{"key":"e_1_3_2_1_4_1","volume-title":"Jonathan Raiman, Tim Salimans, Jeremy Schlatter, Jonas Schneider, Szymon Sidor, Ilya Sutskever, Jie Tang, Filip Wolski, and Susan Zhang.","author":"Berner Christopher","year":"2019","unstructured":"Christopher Berner, Greg Brockman, Brooke Chan, Vicki Cheung, Przemyslaw Debiak, Christy Dennison, David Farhi, Quirin Fischer, Shariq Hashme, Christopher Hesse, Rafal J\u00f3zefowicz, Scott Gray, Catherine Olsson, Jakub Pachocki, Michael Petrov, Henrique Pond\u00e9 de Oliveira Pinto, Jonathan Raiman, Tim Salimans, Jeremy Schlatter, Jonas Schneider, Szymon Sidor, Ilya Sutskever, Jie Tang, Filip Wolski, and Susan Zhang. 2019. Dota 2 with Large Scale Deep Reinforcement Learning. CoRR abs\/1912.06680 (2019). arXiv:1912.06680 http:\/\/arxiv.org\/abs\/1912.06680"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2306.16088"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/AAAI.V35I8.16851"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","unstructured":"Susanne Dandl Christoph Molnar Martin Binder and Bernd Bischl. 2020. Multi-Objective Counterfactual Explanations. In PPSN. 448\u2013469. 10.1007\/978-3-030-58112-1_31","DOI":"10.1007\/978-3-030-58112-1_31"},{"key":"e_1_3_2_1_8_1","unstructured":"Samuel Greydanus Anurag Koul Jonathan Dodge and Alan Fern. 2018. Visualizing and Understanding Atari Agents. In PMLR. 1792\u20131801. https:\/\/proceedings.mlr.press\/v80\/greydanus18a.html"},{"key":"e_1_3_2_1_9_1","volume-title":"Hausknecht and Peter Stone","author":"Matthew","year":"2015","unstructured":"Matthew J. Hausknecht and Peter Stone. 2015. Deep Recurrent Q-Learning for Partially Observable MDPs. In AAAI Fall Symposia. 29\u201337. http:\/\/www.aaai.org\/ocs\/index.php\/FSS\/FSS15\/paper\/view\/11673"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2909824.3020233"},{"key":"e_1_3_2_1_11_1","volume-title":"Virtual Strategy Engineer: Using Artificial Neural Networks for Making Race Strategy Decisions in Circuit Motorsport. Applied Sciences","author":"Heilmeier Alexander","year":"2020","unstructured":"Alexander Heilmeier, Andr\u00e9 Thomaser, Michael Graf, and Johannes Betz. 2020. Virtual Strategy Engineer: Using Artificial Neural Networks for Making Race Strategy Decisions in Circuit Motorsport. Applied Sciences (2020). https:\/\/api.semanticscholar.org\/CorpusID:228907155"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/S10100-022-00806-4"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2309.11987"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/S00521-020-04871-1"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/J.KNOSYS.2021.106781"},{"key":"e_1_3_2_1_16_1","volume-title":"Lundberg and Su-In Lee","author":"Scott","year":"2017","unstructured":"Scott M. Lundberg and Su-In Lee. 2017. A Unified Approach to Interpreting Model Predictions. In NeurIPS. 4765\u20134774. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/8a20a8621978632d76c43dfd28b67767-Abstract.html"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616864"},{"key":"e_1_3_2_1_18_1","volume-title":"Riedmiller","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin A. Riedmiller. 2013. Playing Atari with Deep Reinforcement Learning. CoRR abs\/1312.5602 (2013). arXiv:1312.5602 http:\/\/arxiv.org\/abs\/1312.5602"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1038\/NATURE14236"},{"key":"e_1_3_2_1_20_1","unstructured":"St\u00e9phane Ross Geoffrey J. Gordon and Drew Bagnell. 2011. A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning. In AISTATS. 627\u2013635. http:\/\/proceedings.mlr.press\/v15\/ross11a\/ross11a.pdf"},{"volume-title":"Contributions to the Theory of Games II, Harold W","author":"Shapley Lloyd S","key":"e_1_3_2_1_21_1","unstructured":"Lloyd S Shapley. 1953. A Value for n-Person Games. In Contributions to the Theory of Games II, Harold W. Kuhn and Albert W. Tucker (Eds.). Princeton University Press, Princeton, 307\u2013317."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1038\/NATURE16961"},{"key":"e_1_3_2_1_23_1","unstructured":"Harm van Seijen Mehdi Fatemi Romain Laroche Joshua Romoff Tavian Barnes and Jeffrey Tsang. 2017. Hybrid Reward Architecture for Reinforcement Learning. In NIPS. 5392\u20135402. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/1264a061d82a2edae1574b07249800d6-Abstract.html"},{"key":"e_1_3_2_1_24_1","unstructured":"Abhinav Verma Vijayaraghavan Murali Rishabh Singh Pushmeet Kohli and Swarat Chaudhuri. 2018. Programmatically Interpretable Reinforcement Learning. In ICML. 5052\u20135061. http:\/\/proceedings.mlr.press\/v80\/verma18a.html"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","unstructured":"Oriol Vinyals Igor Babuschkin Wojciech M. Czarnecki Micha\u00ebl Mathieu Andrew Dudzik Junyoung Chung David H. Choi Richard Powell Timo Ewalds Petko Georgiev Junhyuk Oh Dan Horgan Manuel Kroiss Ivo Danihelka Aja Huang Laurent Sifre Trevor Cai John P. Agapiou Max Jaderberg Alexander Sasha Vezhnevets R\u00e9mi Leblond Tobias Pohlen Valentin Dalibard David Budden Yury Sulsky James Molloy Tom Le Paine \u00c7aglar G\u00fcl\u00e7ehre Ziyu Wang Tobias Pfaff Yuhuai Wu Roman Ring Dani Yogatama Dario W\u00fcnsch Katrina McKinney Oliver Smith Tom Schaul Timothy P. Lillicrap Koray Kavukcuoglu Demis Hassabis Chris Apps and David Silver. 2019. Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nat. 575 7782 (2019) 350\u2013354. 10.1038\/S41586-019-1724-Z","DOI":"10.1038\/S41586-019-1724-Z"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","unstructured":"Peter R. Wurman Samuel Barrett Kenta Kawamoto James MacGlashan Kaushik Subramanian Thomas J. Walsh Roberto Capobianco Alisa Devlic Franziska Eckert Florian Fuchs Leilani Gilpin Piyush Khandelwal Varun Raj Kompella HaoChih Lin Patrick MacAlpine Declan Oller Takuma Seno Craig Sherstan Michael D. Thomure Houmehr Aghabozorgi Leon Barrett Rory Douglas Dion Whitehead Peter D\u00fcrr Peter Stone Michael Spranger and Hiroaki Kitano. 2022. Outracing champion Gran Turismo drivers with deep reinforcement learning. Nat. 602 7896 (2022) 223\u2013228. 10.1038\/S41586-021-04357-7","DOI":"10.1038\/S41586-021-04357-7"}],"event":{"name":"SAC '25: 40th ACM\/SIGAPP Symposium on Applied Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"],"location":"Catania International Airport Catania Italy","acronym":"SAC '25"},"container-title":["Proceedings of the 40th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707766","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3672608.3707766","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:32Z","timestamp":1750298252000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707766"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":26,"alternative-id":["10.1145\/3672608.3707766","10.1145\/3672608"],"URL":"https:\/\/doi.org\/10.1145\/3672608.3707766","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]},"assertion":[{"value":"2025-05-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}