{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,15]],"date-time":"2025-12-15T14:21:18Z","timestamp":1765808478892,"version":"3.48.0"},"reference-count":122,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,7,22]],"date-time":"2025-07-22T00:00:00Z","timestamp":1753142400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,22]],"date-time":"2025-07-22T00:00:00Z","timestamp":1753142400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. ITS Res."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s13177-025-00521-9","type":"journal-article","created":{"date-parts":[[2025,7,22]],"date-time":"2025-07-22T09:00:45Z","timestamp":1753174845000},"page":"1406-1440","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Comprehensive Review on Reinforcement Learning Methods for Autonomous Lane Changing"],"prefix":"10.1007","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-0886-1251","authenticated-orcid":false,"given":"Meriem","family":"Bouali","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8742-1240","authenticated-orcid":false,"given":"Abderrazak","family":"Sebaa","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0309-8942","authenticated-orcid":false,"given":"Nadir","family":"Farhi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,22]]},"reference":[{"issue":"6","key":"521_CR1","doi-asserted-by":"crossref","first-page":"4307","DOI":"10.1007\/s10462-021-10108-x","volume":"55","author":"S Adams","year":"2022","unstructured":"Adams, S., Cody, T., Beling, P.A.: A survey of inverse reinforcement learning. Artif. Intell. Rev. 55(6), 4307\u20134346 (2022)","journal-title":"Artif. Intell. Rev."},{"issue":"17\u201318","key":"521_CR2","doi-asserted-by":"crossref","first-page":"6457","DOI":"10.1016\/j.eswa.2015.04.015","volume":"42","author":"DO Aihe","year":"2015","unstructured":"Aihe, D.O., Gonzalez, A.J.: Correcting flawed expert knowledge through reinforcement learning. Expert Syst. Appl. 42(17\u201318), 6457\u20136471 (2015)","journal-title":"Expert Syst. Appl."},{"issue":"8","key":"521_CR3","doi-asserted-by":"crossref","first-page":"3490","DOI":"10.3390\/en16083490","volume":"16","author":"N Albarella","year":"2023","unstructured":"Albarella, N., Lui, D.G., Petrillo, A., Santini, S.: A hybrid deep reinforcement learning and optimal control architecture for autonomous highway driving. Energies 16(8), 3490 (2023)","journal-title":"Energies"},{"doi-asserted-by":"crossref","unstructured":"Alizadeh, A., Moghadam, M., Bicer, Y., Ure, N.K., Yavas, U., Kurtulus, C.: Automated lane change decision making using deep reinforcement learning in dynamic and uncertain highway environment. In: 2019 IEEE Intell, pp. 1399\u20131404. Transp. Syst. Conf. ITSC, IEEE (2019)","key":"521_CR4","DOI":"10.1109\/ITSC.2019.8917192"},{"key":"521_CR5","doi-asserted-by":"crossref","first-page":"27127","DOI":"10.1109\/ACCESS.2023.3253503","volume":"11","author":"A Alzubaidi","year":"2023","unstructured":"Alzubaidi, A., Al Sumaiti, A.S., Byon, Y.J., Al Hosani, K.: Emergency vehicle aware lane change decision model for autonomous vehicles using deep reinforcement learning. IEEE Access 11, 27127\u201327137 (2023)","journal-title":"IEEE Access"},{"unstructured":"Ambros, J., Kysel\u1ef3, M.: Free-flow vs car-following speeds: Does the difference matter? Adv. Transp. Stud. (40) (2016)","key":"521_CR6"},{"issue":"2","key":"521_CR7","doi-asserted-by":"crossref","first-page":"740","DOI":"10.1109\/TITS.2020.3024655","volume":"23","author":"S Aradi","year":"2020","unstructured":"Aradi, S.: Survey of deep reinforcement learning for motion planning of autonomous vehicles. IEEE Trans. Intell. Transp. Syst. 23(2), 740\u2013759 (2020)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"521_CR8","doi-asserted-by":"crossref","first-page":"103500","DOI":"10.1016\/j.artint.2021.103500","volume":"297","author":"S Arora","year":"2021","unstructured":"Arora, S., Doshi, P.: A survey of inverse reinforcement learning: Challenges, methods and progress. Artif. Intell. 297, 103500 (2021)","journal-title":"Artif. Intell."},{"key":"521_CR9","doi-asserted-by":"crossref","first-page":"793","DOI":"10.1007\/s100510050504","volume":"5","author":"R Barlovic","year":"1998","unstructured":"Barlovic, R., Santen, L., Schadschneider, A., Schreckenberg, M.: Metastable states in cellular automata for traffic flow. Eur. Phys. J. B 5, 793\u2013800 (1998)","journal-title":"Eur. Phys. J. B"},{"issue":"1\u20132","key":"521_CR10","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discrete Event Dyn. Syst. 13(1\u20132), 41\u201377 (2003)","journal-title":"Discrete Event Dyn. Syst."},{"unstructured":"Behrisch, M., Bieker, L., Erdmann, J., Krajzewicz, D.: Sumo\u2013simulation of urban mobility: an overview. In: Proceedings of SIMUL 2011, the third international conference on advances in system simulation. ThinkMind. (2011)","key":"521_CR11"},{"unstructured":"Berner, C., Brockman, G., Chan, B., Cheung, V., D\u0119biak, P., Dennison, C., Farhi, D., Fischer, Q., Hashme, S., Hesse, C., et\u00a0al.: Dota 2 with large scale deep reinforcement learning. arXiv:1912.06680 (2019)","key":"521_CR12"},{"issue":"171","key":"521_CR13","first-page":"18","volume":"812","author":"D Bezzina","year":"2014","unstructured":"Bezzina, D., Sayer, J.: Safety pilot model deployment: Test conductor team report. Rep. No DOT HS 812(171), 18 (2014)","journal-title":"Rep. No DOT HS"},{"unstructured":"Bojarski, M., Del\u00a0Testa, D., Dworakowski, D., Firner, B., Flepp, B., Goyal, P., Jackel, L.D., Monfort, M., Muller, U., Zhang, J., et\u00a0al.: End to end learning for self-driving cars. arXiv:1604.07316 (2016)","key":"521_CR14"},{"doi-asserted-by":"crossref","unstructured":"Bonjour, T., Haliem, M., Alsalem, A., Thomas, S., Li, H., Aggarwal, V., Kejriwal, M., Bhargava, B.: Decision making in monopoly using a hybrid deep reinforcement learning approach. IEE Trans. Emerg. Topics Comput. Intell. 6(6), 1335\u20131344 (2022)","key":"521_CR15","DOI":"10.1109\/TETCI.2022.3166555"},{"unstructured":"Bowling, M., Martin, J.D., Abel, D., Dabney, W.: Settling the reward hypothesis. arXiv:2212.10420 (2022)","key":"521_CR16"},{"issue":"1","key":"521_CR17","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TCIAIG.2012.2186810","volume":"4","author":"CB Browne","year":"2012","unstructured":"Browne, C.B., Powley, E., Whitehouse, D., Lucas, S.M., Cowling, P.I., Rohlfshagen, P., Tavener, S., Perez, D., Samothrakis, S., Colton, S.: A survey of monte carlo tree search methods. IEEE Trans. Comput. Intell. AI Games 4(1), 1\u201343 (2012)","journal-title":"IEEE Trans. Comput. Intell. AI Games"},{"key":"521_CR18","doi-asserted-by":"crossref","first-page":"103656","DOI":"10.1016\/j.trc.2022.103656","volume":"138","author":"Z Cao","year":"2022","unstructured":"Cao, Z., Xu, S., Jiao, X., Peng, H., Yang, D.: Trustworthy safety improvement for autonomous driving using reinforcement learning. Transp. Res. Part C Emerg. Technol 138, 103656 (2022)","journal-title":"Transp. Res. Part C Emerg. Technol"},{"doi-asserted-by":"crossref","unstructured":"Casas, J., Ferrer, J.L., Garcia, D., Perarnau, J., Torday, A.: Traffic simulation with aimsun. Fundam. Traffic Simul. 173\u2013232 (2010)","key":"521_CR19","DOI":"10.1007\/978-1-4419-6142-6_5"},{"issue":"10","key":"521_CR20","doi-asserted-by":"crossref","first-page":"896","DOI":"10.1016\/j.trb.2004.12.001","volume":"39","author":"MJ Cassidy","year":"2005","unstructured":"Cassidy, M.J., Rudjanakanoknad, J.: Increasing the capacity of an isolated merge by metering its on-ramp. Transp. Res. B Methodol. 39(10), 896\u2013913 (2005)","journal-title":"Transp. Res. B Methodol."},{"doi-asserted-by":"crossref","unstructured":"Chae, H., Kang, C.M., Kim, B., Kim, J., Chung, C.C., Choi, J.W.: Autonomous braking system via deep reinforcement learning. In: 2017 IEEE 20th International conference on intelligent transportation systems (ITSC), pp. 1-6. IEEE, (2017)","key":"521_CR21","DOI":"10.1109\/ITSC.2017.8317839"},{"key":"521_CR22","doi-asserted-by":"crossref","first-page":"126981","DOI":"10.1016\/j.physa.2022.126981","volume":"593","author":"P Chauhan","year":"2022","unstructured":"Chauhan, P., Kanagaraj, V., Asaithambi, G.: Understanding the mechanism of lane changing process and dynamics using microscopic traffic data. Phys. A 593, 126981 (2022)","journal-title":"Phys. A"},{"issue":"11","key":"521_CR23","doi-asserted-by":"crossref","first-page":"11351","DOI":"10.1109\/TVT.2022.3193096","volume":"71","author":"C Chen","year":"2022","unstructured":"Chen, C., Cai, M., Wang, J., Li, K., Xu, Q., Wang, J., Li, K.: Cooperation method of connected and automated vehicles at unsignalized intersections: Lane changing and arrival scheduling. IEEE Trans. Veh. Technol. 71(11), 11351\u201311366 (2022)","journal-title":"IEEE Trans. Veh. Technol."},{"doi-asserted-by":"crossref","unstructured":"Chen, D., Jiang, L., Wang, Y., Li, Z.: Autonomous driving using safe reinforcement learning by incorporating a regret-based human lane-changing decision model. In: 2020 American Control Conference (ACC), pp. 4355-4361. IEEE, (2020)","key":"521_CR24","DOI":"10.23919\/ACC45564.2020.9147626"},{"doi-asserted-by":"crossref","unstructured":"Chollet, F.: Xception: Deep learning with depthwise separable convolutions. In: Proceedings of the IEEE Conference On Computer Vision and Pattern Recognition, pp 1251\u20131258 (2017)","key":"521_CR25","DOI":"10.1109\/CVPR.2017.195"},{"unstructured":"Christodoulou, P.: Soft actor-critic for discrete action settings. arXiv:1910.07207 (2019)","key":"521_CR26"},{"unstructured":"Chua, K., Calandra, R., McAllister, R., Levine, S.: Deep reinforcement learning in a handful of trials using probabilistic dynamics models. Adv. Neural Inf. Process. 31 (2018)","key":"521_CR27"},{"issue":"5","key":"521_CR28","doi-asserted-by":"crossref","first-page":"1826","DOI":"10.1109\/TITS.2019.2913998","volume":"21","author":"L Claussmann","year":"2019","unstructured":"Claussmann, L., Revilloud, M., Gruyer, D., Glaser, S.: A review of motion planning for highway autonomous driving. IEEE Trans. Intell. Transp. Syst. 21(5), 1826\u20131848 (2019)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"unstructured":"Committee ORADO.: Taxonomy and definitions for terms related to driving automation systems for on-road motor vehicles (2021)","key":"521_CR29"},{"doi-asserted-by":"crossref","unstructured":"Cui, J., Yuan, L., He, L., Xiao, W., Ran, T., Zhang, J.: Multi-input autonomous driving based on deep reinforcement learning with double bias experience replay. IEEE Sens. J. (2023)","key":"521_CR30","DOI":"10.1109\/JSEN.2023.3237206"},{"doi-asserted-by":"crossref","unstructured":"Dabney, W., Rowland, M., Bellemare, M., Munos, R.: Distributional reinforcement learning with quantile regression. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol\u00a032 (2018)","key":"521_CR31","DOI":"10.1609\/aaai.v32i1.11791"},{"issue":"7897","key":"521_CR32","doi-asserted-by":"crossref","first-page":"414","DOI":"10.1038\/s41586-021-04301-9","volume":"602","author":"J Degrave","year":"2022","unstructured":"Degrave, J., Felici, F., Buchli, J., Neunert, M., Tracey, B., Carpanese, F., Ewalds, T., Hafner, R., Abdolmaleki, A., de Las, Casas D., et al.: Magnetic control of tokamak plasmas through deep reinforcement learning. Nature 602(7897), 414\u2013419 (2022)","journal-title":"Nature"},{"unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., Lopez, A., Koltun, V.: Carla: An open urban driving simulator. In: Conference on Robot Learning pp 1-16. PMLR (2017)","key":"521_CR33"},{"issue":"5","key":"521_CR34","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1049\/iet-its.2019.0317","volume":"14","author":"J Duan","year":"2020","unstructured":"Duan, J., Eben Li, S., Guan, Y., Sun, Q., Cheng, B.: Hierarchical reinforcement learning for self-driving decision-making without reliance on labelled driving data. IET Intel. Transport Syst. 14(5), 297\u2013305 (2020)","journal-title":"IET Intel. Transport Syst."},{"unstructured":"Fakoor, R., Chaudhari, P., Soatto, S., Smola, A.J.: Meta-q-learning. arXiv:1910.00125 (2019)","key":"521_CR35"},{"unstructured":"Finn, C., Abbeel, P., Levine, S.: Model-agnostic meta-learning for fast adaptation of deep networks. In: International Conference on Machine Learning, pp. 1126-1135. PMLR, (2017)","key":"521_CR36"},{"doi-asserted-by":"crossref","unstructured":"Foerster, J., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. In: Proceedings of the AAAI Conference on Artificial Intelligence vol\u00a032 (2018)","key":"521_CR37","DOI":"10.1609\/aaai.v32i1.11794"},{"issue":"2","key":"521_CR38","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1016\/0191-2615(81)90037-0","volume":"15","author":"PG Gipps","year":"1981","unstructured":"Gipps, P.G.: A behavioural car-following model for computer simulation. Transp. Res. B Methodol. 15(2), 105\u2013111 (1981)","journal-title":"Transp. Res. B Methodol."},{"issue":"5","key":"521_CR39","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1016\/0191-2615(86)90012-3","volume":"20","author":"PG Gipps","year":"1986","unstructured":"Gipps, P.G.: A model for the structure of lane-changing decisions. Transp. Res. B Methodol. 20(5), 403\u2013414 (1986)","journal-title":"Transp. Res. B Methodol."},{"issue":"6","key":"521_CR40","doi-asserted-by":"crossref","first-page":"750","DOI":"10.1007\/s10458-019-09421-1","volume":"33","author":"P Hernandez-Leal","year":"2019","unstructured":"Hernandez-Leal, P., Kartal, B., Taylor, M.E.: A survey and critique of multiagent deep reinforcement learning. Auton. Agents Multi-Agent Syst. 33(6), 750\u2013797 (2019)","journal-title":"Auton. Agents Multi-Agent Syst."},{"unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. Adv. Neural Inf. Process. 29 (2016)","key":"521_CR41"},{"issue":"8","key":"521_CR42","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"doi-asserted-by":"crossref","unstructured":"Hyldmar, N., He, Y., Prorok, A.: A fleet of miniature cars for experiments in cooperative driving. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 3238-3244. IEEE, (2019)","key":"521_CR43","DOI":"10.1109\/ICRA.2019.8794445"},{"issue":"724","key":"521_CR44","first-page":"1","volume":"4970","author":"International S","year":"2018","unstructured":"International S: Taxonomy and definitions for terms related to driving automation systems for on-road motor vehicles. SAE Int. 4970(724), 1\u20135 (2018)","journal-title":"SAE Int."},{"issue":"6443","key":"521_CR45","doi-asserted-by":"crossref","first-page":"859","DOI":"10.1126\/science.aau6249","volume":"364","author":"M Jaderberg","year":"2019","unstructured":"Jaderberg, M., Czarnecki, W.M., Dunning, I., Marris, L., Lever, G., Castaneda, A.G., Beattie, C., Rabinowitz, N.C., Morcos, A.S., Ruderman, A., et al.: Human-level performance in 3d multiplayer games with population-based reinforcement learning. Science 364(6443), 859\u2013865 (2019)","journal-title":"Science"},{"issue":"8\u20139","key":"521_CR46","doi-asserted-by":"crossref","first-page":"1001","DOI":"10.1016\/j.trb.2009.12.014","volume":"44","author":"WL Jin","year":"2010","unstructured":"Jin, W.L.: A kinematic wave theory of lane-changing traffic flow. Transp. Res. B. Methodol. 44(8\u20139), 1001\u20131021 (2010)","journal-title":"Transp. Res. B. Methodol."},{"unstructured":"Juliani, A., Berges, VP., Teng, E., Cohen, A., Harper, J., Elion, C., Goy, C., Gao, Y., Henry, H., Mattar, M., et\u00a0al.: Unity: A general platform for intelligent agents. arXiv:1809.02627 (2018)","key":"521_CR47"},{"key":"521_CR48","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: A survey. J. Artif. Intell. Res. 4, 237\u2013285 (1996)","journal-title":"J. Artif. Intell. Res."},{"key":"521_CR49","doi-asserted-by":"crossref","first-page":"110517","DOI":"10.1016\/j.automatica.2022.110517","volume":"145","author":"I Karafyllis","year":"2022","unstructured":"Karafyllis, I., Theodosis, D., Papageorgiou, M.: Lyapunov-based two-dimensional cruise control of autonomous vehicles on lane-free roads. Automatica 145, 110517 (2022)","journal-title":"Automatica"},{"issue":"2","key":"521_CR50","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1080\/17489725.2017.1420256","volume":"11","author":"A Keler","year":"2017","unstructured":"Keler, A., Krisp, J.M., Ding, L.: Detecting traffic congestion propagation in urban environments-a case study with floating taxi data (ftd) in shanghai. J. Locat. Based Serv. 11(2), 133\u2013151 (2017)","journal-title":"J. Locat. Based Serv."},{"issue":"1","key":"521_CR51","doi-asserted-by":"crossref","first-page":"86","DOI":"10.3141\/1999-10","volume":"1999","author":"A Kesting","year":"2007","unstructured":"Kesting, A., Treiber, M., Helbing, D.: General lane-changing model mobil for car-following models. Transp. Res. Rec. 1999(1), 86\u201394 (2007)","journal-title":"Transp. Res. Rec."},{"issue":"6","key":"521_CR52","doi-asserted-by":"crossref","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","volume":"23","author":"BR Kiran","year":"2021","unstructured":"Kiran, B.R., Sobh, I., Talpaert, V., Mannion, P., Al Sallab, A.A., Yogamani, S., P\u00e9rez, P.: Deep reinforcement learning for autonomous driving: A survey. IEEE Trans. Intell. Transp. Syst. 23(6), 4909\u20134926 (2021)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"3","key":"521_CR53","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1016\/j.trb.2005.04.003","volume":"40","author":"JA Laval","year":"2006","unstructured":"Laval, J.A., Daganzo, C.F.: Lane-changing in traffic streams. Transp. Res. B Methodol. 40(3), 251\u2013264 (2006)","journal-title":"Transp. Res. B Methodol."},{"unstructured":"Leurent, E., et\u00a0al.: An environment for autonomous driving decision-making (2018)","key":"521_CR54"},{"issue":"12","key":"521_CR55","doi-asserted-by":"crossref","first-page":"1669","DOI":"10.1049\/itr2.12107","volume":"16","author":"G Li","year":"2022","unstructured":"Li, G., Li, S., Li, S., Qu, X.: Continuous decision-making for autonomous driving at intersections using deep deterministic policy gradient. IET Intel. Transport Syst. 16(12), 1669\u20131681 (2022)","journal-title":"IET Intel. Transport Syst."},{"doi-asserted-by":"crossref","unstructured":"Li, G., Qiu, Y., Yang, Y., Li, Z., Li, S., Chu, W., Green, P., Li, S.E.: Lane change strategies for autonomous vehicles: A deep reinforcement learning approach based on transformer. IEEE Trans. Intell. Veh. (2022)","key":"521_CR56","DOI":"10.1109\/TIV.2022.3227921"},{"key":"521_CR57","doi-asserted-by":"crossref","first-page":"103452","DOI":"10.1016\/j.trc.2021.103452","volume":"134","author":"G Li","year":"2022","unstructured":"Li, G., Yang, Y., Li, S., Qu, X., Lyu, N., Li, S.E.: Decision making of autonomous vehicles in lane change scenarios: Deep reinforcement learning approaches with risk awareness. Transp. Res. Part C Emerg. Technol. 134, 103452 (2022)","journal-title":"Transp. Res. Part C Emerg. Technol."},{"key":"521_CR58","doi-asserted-by":"crossref","first-page":"110","DOI":"10.1016\/j.ins.2020.03.105","volume":"532","author":"J Li","year":"2020","unstructured":"Li, J., Yao, L., Xu, X., Cheng, B., Ren, J.: Deep reinforcement learning for pedestrian collision avoidance and human-machine cooperative driving. Inf. Sci. 532, 110\u2013124 (2020)","journal-title":"Inf. Sci."},{"issue":"9","key":"521_CR59","doi-asserted-by":"crossref","first-page":"16110","DOI":"10.1109\/TITS.2022.3148085","volume":"23","author":"S Li","year":"2022","unstructured":"Li, S., Wei, C., Wang, Y.: Combining decision making and trajectory planning for lane changing using deep reinforcement learning. IEEE Trans. Intell. Transp. Syst. 23(9), 16110\u201316136 (2022)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"unstructured":"Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., Wierstra, D.: Continuous control with deep reinforcement learning. arXiv:1509.02971 (2015)","key":"521_CR60"},{"key":"521_CR61","doi-asserted-by":"crossref","first-page":"107476","DOI":"10.1016\/j.aap.2024.107476","volume":"198","author":"H Liu","year":"2024","unstructured":"Liu, H., Wang, T., Li, W., Ye, X., Yuan, Q.: Lane-change intention recognition considering oncoming traffic: Novel insights revealed by advances in deep learning. Accid. Anal. Prev. 198, 107476 (2024)","journal-title":"Accid. Anal. Prev."},{"issue":"9","key":"521_CR62","doi-asserted-by":"crossref","first-page":"9261","DOI":"10.1109\/TVT.2022.3179332","volume":"71","author":"J Liu","year":"2022","unstructured":"Liu, J., Boyle, L.N., Banerjee, A.G.: An inverse reinforcement learning approach for customizing automated lane change systems. IEEE Trans. Veh. Technol. 71(9), 9261\u20139271 (2022)","journal-title":"IEEE Trans. Veh. Technol."},{"unstructured":"Lowe, R., Wu, Y.I., Tamar, A., Harb, J., Pieter\u00a0Abbeel, O., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. Adv. Neural Inf. Process. 30 (2017)","key":"521_CR63"},{"key":"521_CR64","doi-asserted-by":"crossref","first-page":"104328","DOI":"10.1016\/j.trc.2023.104328","volume":"156","author":"C Lu","year":"2023","unstructured":"Lu, C., Lu, H., Chen, D., Wang, H., Li, P., Gong, J.: Human-like decision making for lane change based on the cognitive map and hierarchical reinforcement learning. Transp. Res. Part C Emerg. Technol. 156, 104328 (2023)","journal-title":"Transp. Res. Part C Emerg. Technol."},{"issue":"9","key":"521_CR65","doi-asserted-by":"crossref","first-page":"1551","DOI":"10.3390\/math10091551","volume":"10","author":"K Lv","year":"2022","unstructured":"Lv, K., Pei, X., Chen, C., Xu, J.: A safe and efficient lane change decision-making strategy of autonomous driving based on deep reinforcement learning. Mathematics 10(9), 1551 (2022)","journal-title":"Mathematics"},{"unstructured":"MacQueen, J., et\u00a0al.: Some methods for classification and analysis of multivariate observations. In: Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability, Oakland, CA, USA, vol\u00a01, pp. 281\u2013297 (1967)","key":"521_CR66"},{"key":"521_CR67","doi-asserted-by":"crossref","first-page":"103060","DOI":"10.1016\/j.trc.2021.103060","volume":"126","author":"M Malekzadeh","year":"2021","unstructured":"Malekzadeh, M., Papamichail, I., Papageorgiou, M., Bogenberger, K.: Optimal internal boundary control of lane-free automated vehicle traffic. Transp. Res. Part C Emerg. Technol. 126, 103060 (2021)","journal-title":"Transp. Res. Part C Emerg. Technol."},{"doi-asserted-by":"crossref","unstructured":"Mauch, M., Cassidy, M.J.: Freeway traffic oscillations: observations and predictions. In: Transportation and Traffic Theory in the 21st Century: Proceedings of the 15th International Symposium on Transportation and Traffic Theory, Adelaide, Australia, 16-18 July 2002, pp. 653-673. Emerald Group Publishing Limited, pp. 653\u2013673 (2002)","key":"521_CR68","DOI":"10.1108\/9780585474601-032"},{"issue":"62","key":"521_CR69","doi-asserted-by":"crossref","first-page":"eabk2822","DOI":"10.1126\/scirobotics.abk2822","volume":"7","author":"T Miki","year":"2022","unstructured":"Miki, T., Lee, J., Hwangbo, J., Wellhausen, L., Koltun, V., Hutter, M.: Learning robust perceptive locomotion for quadrupedal robots in the wild. Sci. Rob. 7(62), eabk2822 (2022)","journal-title":"Sci. Rob."},{"issue":"7540","key":"521_CR70","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"doi-asserted-by":"crossref","unstructured":"Nagabandi, A., Kahn, G., Fearing, R.S., Levine, S.: Neural network dynamics for model-based deep reinforcement learning with model-free fine-tuning. In: 2018 IEEE International Conference on Robotics and Automation (ICRA), pp. 7559-7566. IEEE, (2018)","key":"521_CR71","DOI":"10.1109\/ICRA.2018.8463189"},{"unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: Theory and application to reward shaping. In: Icml, Citeseer vol. 99, pp. 278\u2013287 (1999)","key":"521_CR72"},{"unstructured":"Ng, A.Y., Russell, S., et\u00a0al.: Algorithms for inverse reinforcement learning. In: Icml, vol.\u00a01, p.\u00a02 (2000)","key":"521_CR73"},{"issue":"9","key":"521_CR74","doi-asserted-by":"crossref","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","volume":"50","author":"TT Nguyen","year":"2020","unstructured":"Nguyen, T.T., Nguyen, N.D., Nahavandi, S.: Deep reinforcement learning for multiagent systems: A review of challenges, solutions, and applications. IEEE Trans. Cybern. 50(9), 3826\u20133839 (2020)","journal-title":"IEEE Trans. Cybern."},{"issue":"2","key":"521_CR75","doi-asserted-by":"crossref","first-page":"182","DOI":"10.1016\/j.trc.2010.05.006","volume":"19","author":"S Ossen","year":"2011","unstructured":"Ossen, S., Hoogendoorn, S.P.: Heterogeneity in car-following behavior: Theory and empirics. Transp. Res. Part C Emerg. Technol. 19(2), 182\u2013195 (2011)","journal-title":"Transp. Res. Part C Emerg. Technol."},{"issue":"11","key":"521_CR76","doi-asserted-by":"crossref","first-page":"21848","DOI":"10.1109\/TITS.2022.3185255","volume":"23","author":"J Peng","year":"2022","unstructured":"Peng, J., Zhang, S., Zhou, Y., Li, Z.: An integrated model for autonomous speed and lane change decision-making based on deep reinforcement learning. IEEE Trans. Intell. Transp. Syst. 23(11), 21848\u201321860 (2022)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"doi-asserted-by":"crossref","unstructured":"Pinosky, A., Abraham, I., Broad, A., Argall, B., Murphey, T.D.: Hybrid control for combining model-based and model-free reinforcement learning. J. Robot. Res. p. 02783649221083331 (2022)","key":"521_CR77","DOI":"10.1177\/02783649221083331"},{"key":"521_CR78","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"2014","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley & Sons (2014)"},{"key":"521_CR79","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1016\/j.procs.2014.05.026","volume":"29","author":"WL Quek","year":"2014","unstructured":"Quek, W.L., Chew, L.Y.: Mechanism of traffic jams at speed bottlenecks. Procedia. Comput. Sci. 29, 289\u2013298 (2014)","journal-title":"Procedia. Comput. Sci."},{"unstructured":"Rakelly, K., Zhou, A., Finn, C., Levine, S., Quillen, D.: Efficient off-policy meta-reinforcement learning via probabilistic context variables. In: International Conference on Machine Learning, pp. 5331-5340. PMLR, (2019)","key":"521_CR80"},{"unstructured":"Ramachandran, D., Amir, E.: Bayesian inverse reinforcement learning. In: IJCAI vol. 7, pp. 2586\u20132591 (2007)","key":"521_CR81"},{"doi-asserted-by":"crossref","unstructured":"Saxena, D.M., Bae, S., Nakhaei, A., Fujimura, K., Likhachev, M.: Driving in dense traffic with model-free reinforcement learning. In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 5385-5392. IEEE, (2020)","key":"521_CR82","DOI":"10.1109\/ICRA40945.2020.9197132"},{"unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: International Conference on Machine Learning, pp. 1889-1897. PMLR (2015)","key":"521_CR83"},{"issue":"10","key":"521_CR84","doi-asserted-by":"crossref","first-page":"1095","DOI":"10.1073\/pnas.39.10.1095","volume":"39","author":"LS Shapley","year":"1953","unstructured":"Shapley, L.S.: Stochastic games. Proc. Natl. Acad. Sci. 39(10), 1095\u20131100 (1953)","journal-title":"Proc. Natl. Acad. Sci."},{"doi-asserted-by":"crossref","unstructured":"Shoaraee, H., Chen, L., Jiang, F.: Decision-making of an autonomous vehicle when approached by an emergency vehicle using deep reinforcement learning. In: 2021 IEEE Int. Conf. Dependable, Auton. and Secure Comput., Int. Conf. Pervasive Intell. Comput., Int. Conf. Cloud Big Data Comput., Int. Conf. Cyber Sci. Technol. Congr. DASC\/PiCom\/CBDCom\/CyberSciTech. IEEE, pp 185\u2013191 (2021)","key":"521_CR85","DOI":"10.1109\/DASC-PICom-CBDCom-CyberSciTech52372.2021.00041"},{"key":"521_CR86","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1016\/j.conb.2013.12.004","volume":"25","author":"H Shteingart","year":"2014","unstructured":"Shteingart, H., Loewenstein, Y.: Reinforcement learning and human behavior. Curr. Opin. Neurobiol. 25, 93\u201398 (2014)","journal-title":"Curr. Opin. Neurobiol."},{"unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms. In: International Conference on Machine Learning, pp. 387-395. Pmlr, (2014)","key":"521_CR87"},{"issue":"7587","key":"521_CR88","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C.J., Guez, A., Sifre, L., Van Den Driessche, G., Schrittwieser, J., Antonoglou, I., Panneershelvam, V., Lanctot, M., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"unstructured":"Silver, D., Hubert, T., Schrittwieser, J., Antonoglou, I., Lai, M., Guez, A., Lanctot, M., Sifre, L., Kumaran, D., Graepel, T., et\u00a0al.: Mastering chess and shogi by self-play with a general reinforcement learning algorithm. arXiv:1712.01815","key":"521_CR89"},{"unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. MIT press (2018)","key":"521_CR90"},{"key":"521_CR91","doi-asserted-by":"crossref","first-page":"452","DOI":"10.1016\/j.eswa.2017.09.025","volume":"91","author":"J Tang","year":"2018","unstructured":"Tang, J., Liu, F., Zhang, W., Ke, R., Zou, Y.: Lane-changes prediction based on adaptive fuzzy neural network. Expert Syst. Appl. 91, 452\u2013463 (2018)","journal-title":"Expert Syst. Appl."},{"key":"521_CR92","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2021.114632","volume":"173","author":"T Th\u00e9ate","year":"2021","unstructured":"Th\u00e9ate, T., Ernst, D.: An application of deep reinforcement learning to algorithmic trading. Expert Syst. Appl. 173, 114632 (2021)","journal-title":"Expert Syst. Appl."},{"issue":"2","key":"521_CR93","doi-asserted-by":"crossref","first-page":"1805","DOI":"10.1103\/PhysRevE.62.1805","volume":"62","author":"M Treiber","year":"2000","unstructured":"Treiber, M., Hennecke, A., Helbing, D.: Congested traffic states in empirical observations and microscopic simulations. Phys. Rev. E 62(2), 1805 (2000)","journal-title":"Phys. Rev. E"},{"issue":"1","key":"521_CR94","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1016\/j.physa.2005.05.001","volume":"360","author":"M Treiber","year":"2006","unstructured":"Treiber, M., Kesting, A., Helbing, D.: Delays, inaccuracies and anticipation in microscopic traffic models. Phys. A Stat. Mech. Appl. 360(1), 71\u201388 (2006)","journal-title":"Phys. A Stat. Mech. Appl."},{"unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. 30 (2017)","key":"521_CR95"},{"unstructured":"Vinyals, O., Ewalds, T., Bartunov, S., Georgiev, P., Vezhnevets, A.S., Yeo, M., Makhzani, A., K\u00fcttler, H., Agapiou, J., Schrittwieser, J., et\u00a0al.: Starcraft ii: A new challenge for reinforcement learning. arXiv:1708.04782 (2017)","key":"521_CR96"},{"issue":"7782","key":"521_CR97","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., Babuschkin, I., Czarnecki, W.M., Mathieu, M., Dudzik, A., Chung, J., Choi, D.H., Powell, R., Ewalds, T., Georgiev, P., et al.: Grandmaster level in starcraft ii using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019)","journal-title":"Nature"},{"doi-asserted-by":"crossref","unstructured":"Wang, L., Ye, F., Wang, Y., Guo, J., Papamichail, I., Papageorgiou, M., Hu, S., Zhang, L.: A Q-learning foresighted approach to ego-efficient lane changes of connected and automated vehicles on freeways. In: 2019 IEEE Intelligent Transportation Systems Conference (ITSC), pp. 1385-1392. IEEE, (2019)","key":"521_CR98","DOI":"10.1109\/ITSC.2019.8917036"},{"key":"521_CR99","doi-asserted-by":"crossref","first-page":"103478","DOI":"10.1016\/j.trc.2021.103478","volume":"138","author":"Y Wang","year":"2022","unstructured":"Wang, Y., Wang, L., Guo, J., Papamichail, I., Papageorgiou, M., Wang, F.Y., Bertini, R., Hua, W., Yang, Q.: Ego-efficient lane changes of connected and automated vehicles with impacts on traffic flow. Transp. Res. Part C Emerg. Technol. 138, 103478 (2022)","journal-title":"Transp. Res. Part C Emerg. Technol."},{"key":"521_CR100","doi-asserted-by":"crossref","first-page":"114675","DOI":"10.1016\/j.eswa.2021.114675","volume":"173","author":"Z Wang","year":"2021","unstructured":"Wang, Z., Zhao, X., Chen, Z., Li, X.: A dynamic cooperative lane-changing model for connected and autonomous vehicles with possible accelerations of a preceding vehicle. Expert Syst. Appl. 173, 114675 (2021)","journal-title":"Expert Syst. Appl."},{"key":"521_CR101","doi-asserted-by":"crossref","first-page":"122158","DOI":"10.1016\/j.eswa.2023.122158","volume":"238","author":"Z Wang","year":"2024","unstructured":"Wang, Z., Huang, H., Tang, J., Hu, L.: A deep reinforcement learning-based approach for autonomous lane-changing velocity control in mixed flow of vehicle group level. Expert Syst. Appl. 238, 122158 (2024)","journal-title":"Expert Syst. Appl."},{"key":"521_CR102","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Mach. Learn. 8, 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"doi-asserted-by":"crossref","unstructured":"Wegener, A., Pi\u00f3rkowski, M., Raya, M., Hellbr\u00fcck, H., Fischer, S., Hubaux, J.P.: Traci: an interface for coupling road traffic and network simulators. In: Proceedings of the 11th Communications and Networking Simulation Symposium, pp. 155\u2013163 (2008)","key":"521_CR103","DOI":"10.1145\/1400713.1400740"},{"unstructured":"World Health Organization: World health organization road traffic injuries. Eri\u015fim Adresi: https:\/\/www.who.int\/news-room\/fact-sheets\/detail\/road-traffic-injuries (Eri\u015fim Tarihi: 14.07 2019) (2018)","key":"521_CR104"},{"key":"521_CR105","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1016\/j.trc.2019.07.002","volume":"106","author":"DF Xie","year":"2019","unstructured":"Xie, D.F., Fang, Z.Z., Jia, B., He, Z.: A data-driven lane-changing model based on deep learning. Transp. Res. Part C Emerg. Technol. 106, 41\u201360 (2019)","journal-title":"Transp. Res. Part C Emerg. Technol."},{"doi-asserted-by":"crossref","unstructured":"Xiong, G., Kang, Z., Li, H., Song, W., Jin, Y., Gong, J.: Decision-making of lane change behavior based on rcs for automated vehicles in the real environment. In: 2018 IEEE Intelligent Vehicles Symposium (IV), pp. 1400-1405. IEEE, (2018)","key":"521_CR106","DOI":"10.1109\/IVS.2018.8500651"},{"key":"521_CR107","doi-asserted-by":"crossref","first-page":"103738","DOI":"10.1016\/j.trc.2022.103738","volume":"141","author":"Q Xue","year":"2022","unstructured":"Xue, Q., Xing, Y., Lu, J.: An integrated lane change prediction model incorporating traffic context based on trajectory data. Transp. Res. Part C Emerg. Technol. 141, 103738 (2022)","journal-title":"Transp. Res. Part C Emerg. Technol."},{"issue":"3","key":"521_CR108","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1016\/S0968-090X(96)00006-X","volume":"4","author":"Q Yang","year":"1996","unstructured":"Yang, Q., Koutsopoulos, H.N.: A microscopic traffic simulator for evaluation of dynamic traffic management systems. Transp. Res. Part C Emerg. Technol. 4(3), 113\u2013129 (1996)","journal-title":"Transp. Res. Part C Emerg. Technol."},{"unstructured":"Yang, Y., Wang, J.: An overview of multi-agent reinforcement learning from game theoretical perspective. arXiv:2011.00583 (2020)","key":"521_CR109"},{"key":"521_CR110","doi-asserted-by":"crossref","first-page":"120133","DOI":"10.1016\/j.eswa.2023.120133","volume":"225","author":"Z Yao","year":"2023","unstructured":"Yao, Z., Deng, H., Wu, Y., Zhao, B., Li, G., Jiang, Y.: Optimal lane-changing trajectory planning for autonomous vehicles considering energy consumption. Expert Syst. Appl. 225, 120133 (2023)","journal-title":"Expert Syst. Appl."},{"doi-asserted-by":"crossref","unstructured":"Zhang, J., Chang, C., Pei, H., Peng, X., Guo, Y., Lian, R., Chen, Z., Li, L.: Cavsim: A microscope traffic simulator for connected and automated vehicles environment. In: 2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC), pp. 3719-3724. IEEE, (2022)","key":"521_CR111","DOI":"10.1109\/ITSC55140.2022.9922267"},{"issue":"1","key":"521_CR112","doi-asserted-by":"crossref","first-page":"854","DOI":"10.1109\/TITS.2022.3216288","volume":"24","author":"J Zhang","year":"2022","unstructured":"Zhang, J., Chang, C., Zeng, X., Li, L.: Multi-agent drl-based lane change with right-of-way collaboration awareness. IEEE Trans. Intell. Transp. Syst. 24(1), 854\u2013869 (2022)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"doi-asserted-by":"crossref","unstructured":"Zhang, K., Yang, Z., Ba\u015far, T.: Multi-agent reinforcement learning: A selective overview of theories and algorithms. Handb. Reinf. Learn. Control 321\u2013384 (2021)","key":"521_CR113","DOI":"10.1007\/978-3-030-60990-0_12"},{"doi-asserted-by":"crossref","unstructured":"Zhang, S., Peng, H., Nageshrao, S., Tseng, E.: Discretionary lane change decision making using reinforcement learning with model-based exploration. In: 2019 18th IEEE International Conference On Machine Learning And Applications (ICMLA), pp. 844-850. IEEE, (2019)","key":"521_CR114","DOI":"10.1109\/ICMLA.2019.00147"},{"doi-asserted-by":"crossref","unstructured":"Zhang, S., Wen, L., Peng, H., Tseng, H.E.: Quick learner automated vehicle adapting its roadmanship to varying traffic cultures with meta reinforcement learning. In: 2021 IEEE International Intelligent Transportation Systems Conference (ITSC), pp. 1745-1752. IEEE, (2021)","key":"521_CR115","DOI":"10.1109\/ITSC48978.2021.9564972"},{"issue":"12","key":"521_CR116","doi-asserted-by":"crossref","first-page":"5526","DOI":"10.1109\/TNNLS.2020.3042981","volume":"32","author":"Y Zhang","year":"2020","unstructured":"Zhang, Y., Gao, B., Guo, L., Guo, H., Chen, H.: Adaptive decision-making for automated vehicles under roundabout scenarios using optimization embedded reinforcement learning. IEEE Trans. Neural Netw. Learn. Syst. 32(12), 5526\u20135538 (2020)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"doi-asserted-by":"crossref","unstructured":"Zhao, J., Zhao, W., Deng, B., Wang, Z., Zhang, F., Zheng, W., Cao, W., Nan, J., Lian, Y., Burke, A.F.: Autonomous driving system: A comprehensive survey. Expert. Syst. Appl. 122836 (2023)","key":"521_CR117","DOI":"10.1016\/j.eswa.2023.122836"},{"key":"521_CR118","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1016\/j.trb.2013.11.009","volume":"60","author":"Z Zheng","year":"2014","unstructured":"Zheng, Z.: Recent developments and research needs in modeling lane changing. Transp. Res. B Methodol. 60, 16\u201332 (2014)","journal-title":"Transp. Res. B Methodol."},{"doi-asserted-by":"crossref","unstructured":"Zhou, J., Wang, L., Wang, X.: Scalable evaluation methods for autonomous vehicles. Expert. Syst. Appl. 123603 (2024)","key":"521_CR119","DOI":"10.1016\/j.eswa.2024.123603"},{"issue":"1","key":"521_CR120","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1007\/s43684-022-00023-5","volume":"2","author":"W Zhou","year":"2022","unstructured":"Zhou, W., Chen, D., Yan, J., Li, Z., Yin, H., Ge, W.: Multi-agent reinforcement learning for cooperative lane changing of connected and autonomous vehicles in mixed traffic. Auton. Intell. Syst. 2(1), 5 (2022)","journal-title":"Auton. Intell. Syst."},{"unstructured":"Ziebart, B.D., Maas, A.L., Bagnell, J.A., Dey, A.K., et al.: Maximum entropy inverse reinforcement learning. In: Aaai, Chicago, IL, USA vol. 8, pp. 1433\u20131438 (2008)","key":"521_CR121"},{"unstructured":"Zintgraf, L., Shiarlis, K., Igl, M., Schulze, S., Gal, Y., Hofmann, K., Whiteson, S.: Varibad: A very good method for bayes-adaptive deep rl via meta-learning. arXiv:1910.08348 (2019)","key":"521_CR122"}],"container-title":["International Journal of Intelligent Transportation Systems Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13177-025-00521-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13177-025-00521-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13177-025-00521-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,15]],"date-time":"2025-12-15T12:57:20Z","timestamp":1765803440000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13177-025-00521-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,22]]},"references-count":122,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["521"],"URL":"https:\/\/doi.org\/10.1007\/s13177-025-00521-9","relation":{},"ISSN":["1348-8503","1868-8659"],"issn-type":[{"type":"print","value":"1348-8503"},{"type":"electronic","value":"1868-8659"}],"subject":[],"published":{"date-parts":[[2025,7,22]]},"assertion":[{"value":"4 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 November 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 July 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 July 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"All authors of this paper consent to its publication.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare that they have no conflict of interest.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}