{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T06:12:08Z","timestamp":1760854328752,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"\u00d6sterreichische Forschungsf\u00f6rderungsgesellschaft FFG","award":["887500"],"award-info":[{"award-number":["887500"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,2]]},"DOI":"10.1145\/3651781.3651828","type":"proceedings-article","created":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T10:41:25Z","timestamp":1717065685000},"page":"310-317","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Reinforcement learning in autonomous multi-vehicle systems: A structured review"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3562-5265","authenticated-orcid":false,"given":"Florian","family":"Merkle","sequence":"first","affiliation":[{"name":"University of Innsbruck, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9412-5856","authenticated-orcid":false,"given":"Gregor","family":"Blossey","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2589-1367","authenticated-orcid":false,"given":"Stefan","family":"Haeussler","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4812-4665","authenticated-orcid":false,"given":"Manuel","family":"Schneckenreither","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,5,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108166"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2013.01.008"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.phycom.2019.03.007"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dcan.2021.07.007"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejcon.2022.100735"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-013-0003-8"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2021.01.019"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijpe.2021.108034"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cogsys.2020.09.006"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.105930"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/IRDS.2002.1041682"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2017.08.1219"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacsc.2020.100096"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2021.103289"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2022.112035"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3570723"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ast.2021.106887"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2021.102283"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2023.113661"},{"key":"e_1_3_2_1_20_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap P","year":"2015","unstructured":"Timothy\u00a0P Lillicrap, Jonathan\u00a0J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dcan.2022.08.009"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ress.2023.109149"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ast.2019.105671"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cirp.2020.04.001"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.apor.2022.103106"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177697700"},{"key":"e_1_3_2_1_27_1","volume-title":"International conference on machine learning. 1928\u20131937","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adria\u00a0Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. 1928\u20131937."},{"key":"e_1_3_2_1_28_1","volume-title":"Human-level control through deep reinforcement learning. Nature 518, 7540","author":"Mnih Volodymyr","year":"2015","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Andrei\u00a0A Rusu, Joel Veness, Marc\u00a0G Bellemare, Alex Graves, Martin Riedmiller, Andreas\u00a0K Fidjeland, Georg Ostrovski, 2015. Human-level control through deep reinforcement learning. Nature 518, 7540 (2015), 529\u2013533."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2021.11.257"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2022.112147"},{"key":"e_1_3_2_1_31_1","volume-title":"International conference on machine learning. PMLR","author":"Schulman John","year":"2015","unstructured":"John Schulman, Sergey Levine, Pieter Abbeel, Michael Jordan, and Philipp Moritz. 2015. Trust region policy optimization. In International conference on machine learning. PMLR, 1889\u20131897."},{"key":"e_1_3_2_1_32_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"volume-title":"Introduction to reinforcement learning. Vol.\u00a0135","author":"Sutton S","key":"e_1_3_2_1_33_1","unstructured":"Richard\u00a0S Sutton, Andrew\u00a0G Barto, 1998. Introduction to reinforcement learning. Vol.\u00a0135. MIT press Cambridge."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2022.05.131"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cja.2020.05.001"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2020.12.1462"},{"volume-title":"Learning from delayed rewards. Ph.\u00a0D. Dissertation","author":"Cornish\u00a0Hellaby Watkins Christopher John","key":"e_1_3_2_1_37_1","unstructured":"Christopher John Cornish\u00a0Hellaby Watkins. 1989. Learning from delayed rewards. Ph.\u00a0D. Dissertation. University of Cambridge England."},{"key":"e_1_3_2_1_38_1","volume-title":"Analyzing the past to prepare for the future: Writing a literature review. MIS quarterly","author":"Webster Jane","year":"2002","unstructured":"Jane Webster and Richard\u00a0T Watson. 2002. Analyzing the past to prepare for the future: Writing a literature review. MIS quarterly (2002), xiii\u2013xxiii."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dt.2022.09.014"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2021.107695"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2023.114005"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2020.103594"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2006.01.002"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2007.01.008"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2021.103140"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.109075"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ast.2019.06.024"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cja.2021.09.008"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.09.044"}],"event":{"name":"ICSCA 2024: 2024 13th International Conference on Software and Computer Applications","acronym":"ICSCA 2024","location":"Bali Island Indonesia"},"container-title":["Proceedings of the 2024 13th International Conference on Software and Computer Applications"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651781.3651828","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3651781.3651828","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:34:07Z","timestamp":1755891247000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651781.3651828"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2]]},"references-count":49,"alternative-id":["10.1145\/3651781.3651828","10.1145\/3651781"],"URL":"https:\/\/doi.org\/10.1145\/3651781.3651828","relation":{},"subject":[],"published":{"date-parts":[[2024,2]]},"assertion":[{"value":"2024-05-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}