{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T15:50:35Z","timestamp":1776181835960,"version":"3.50.1"},"reference-count":229,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2022,8,7]],"date-time":"2022-08-07T00:00:00Z","timestamp":1659830400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,8,7]],"date-time":"2022-08-07T00:00:00Z","timestamp":1659830400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1007\/s11263-022-01657-x","type":"journal-article","created":{"date-parts":[[2022,8,7]],"date-time":"2022-08-07T13:02:10Z","timestamp":1659877330000},"page":"2425-2452","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":176,"title":["Explainability of Deep Vision-Based Autonomous Driving Systems: Review and Challenges"],"prefix":"10.1007","volume":"130","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2757-2036","authenticated-orcid":false,"given":"\u00c9loi","family":"Zablocki","sequence":"first","affiliation":[]},{"given":"H\u00e9di","family":"Ben-Younes","sequence":"additional","affiliation":[]},{"given":"Patrick","family":"P\u00e9rez","sequence":"additional","affiliation":[]},{"given":"Matthieu","family":"Cord","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,7]]},"reference":[{"key":"1657_CR1","doi-asserted-by":"crossref","unstructured":"Adadi, A. & Berrada, M. (2018). Peeking inside the black-box: A survey on explainable artificial intelligence (XAI). IEEE Access","DOI":"10.1109\/ACCESS.2018.2870052"},{"key":"1657_CR2","unstructured":"Adebayo, J., Gilmer, J., Muelly, M., Goodfellow, IJ., Hardt, M., & Kim, B. (2018). Sanity checks for saliency maps. In NeurIPS"},{"key":"1657_CR3","doi-asserted-by":"crossref","unstructured":"Agrawal, A., Batra, D., Parikh, D., & Kembhavi, A. (2018). Don\u2019t just assume; look and answer: Overcoming priors for visual question answering. In CVPR","DOI":"10.1109\/CVPR.2018.00522"},{"key":"1657_CR4","doi-asserted-by":"crossref","unstructured":"Agrawal, A., Lu, J., Antol, S., Mitchell, M., Zitnick, CL., Parikh, D., & Batra, D. (2017). VQA: visual question answering - www.visualqa.org. IJCV","DOI":"10.1007\/s11263-016-0966-6"},{"key":"1657_CR5","unstructured":"Alhaija, HA., Mustikovela, SK., Mescheder, LM., Geiger, A., & Rother, C. (2018). Augmented reality meets computer vision: Efficient data generation for urban driving scenes. IJCV"},{"key":"1657_CR6","unstructured":"Alipour, K., Schulze, JP., Yao, Y., Ziskind, A., & Burachas, G. (2020). A study on multimodal and interactive explanations for visual question answering. In SafeAI@AAAI"},{"key":"1657_CR7","unstructured":"Alvarez-Melis, D. & Jaakkola, TS. (2018). Towards robust interpretability with self-explaining neural networks. In NeurIPS"},{"key":"1657_CR8","unstructured":"Anderson, JM., Nidhi, K., Stanley, KD., Sorensen, P., Samaras, C., & Oluwatola, OA. (2014). Autonomous vehicle technology: A guide for policymakers"},{"key":"1657_CR9","doi-asserted-by":"crossref","unstructured":"Antol, S., Agrawal, A., Lu, J., Mitchell, M., Batra, D., Zitnick, CL., & Parikh, D. (2015). VQA: visual question answering. In ICCV","DOI":"10.1109\/ICCV.2015.279"},{"issue":"C","key":"1657_CR10","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.inffus.2021.11.008","volume":"81","author":"L Arras","year":"2022","unstructured":"Arras, L., Osman, A., & Samek, W. (2022). Clevr-xai: A benchmark dataset for the ground truth evaluation of neural network explanations. Inf Fusion, 81(C), 14\u201340. https:\/\/doi.org\/10.1016\/j.inffus.2021.11.008.","journal-title":"Inf Fusion"},{"key":"1657_CR11","doi-asserted-by":"crossref","unstructured":"Bach, S., Binder, A., Montavon, G., Klauschen, F., M\u00fcller, KR., & Samek, W. (2015). On pixel-wise explanations for non-linear classifier decisions by layer-wise relevance propagation. PloS one","DOI":"10.1371\/journal.pone.0130140"},{"key":"1657_CR12","unstructured":"Bahdanau, D., Cho, K., & Bengio, Y. (2015). Neural machine translation by jointly learning to align and translate. In ICLR"},{"key":"1657_CR13","doi-asserted-by":"crossref","unstructured":"Bailo, O., Ham, D., & Shin, YM. (2019). Red blood cell image generation for data augmentation using conditional generative adversarial networks. In CVPR Workshops","DOI":"10.1109\/CVPRW.2019.00136"},{"key":"1657_CR14","unstructured":"Banerjee, S., & Lavie, A. (2005). METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In Workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization @ACL"},{"key":"1657_CR15","doi-asserted-by":"crossref","unstructured":"Bansal, M., Krizhevsky, A., & Ogale, AS. (2019). Chauffeurnet: Learning to drive by imitating the best and synthesizing the worst. In Robotics: Science and Systems","DOI":"10.15607\/RSS.2019.XV.031"},{"key":"1657_CR16","doi-asserted-by":"crossref","unstructured":"Bansal, A., Ma, S., Ramanan, D., & Sheikh, Y. (2018). Recycle-gan: Unsupervised video retargeting. In ECCV","DOI":"10.1007\/978-3-030-01228-1_8"},{"key":"1657_CR17","doi-asserted-by":"crossref","unstructured":"Bau, D., Zhou, B., Khosla, A., Oliva, A., & Torralba, A. (2017). Network dissection: Quantifying interpretability of deep visual representations. In CVPR","DOI":"10.1109\/CVPR.2017.354"},{"key":"1657_CR18","doi-asserted-by":"crossref","unstructured":"Beaudouin, V., Bloch, I., Bounie, D., Cl\u00e9men\u00e7on, S., d\u2019Alch\u00e9-Buc, F., Eagan, J., Maxwell, W., Mozharovskyi, P., & Parekh, J. (2020). Flexible and context-specific AI explainability: A multidisciplinary approach. CoRR","DOI":"10.2139\/ssrn.3559477"},{"key":"1657_CR19","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Courville, AC., & Vincent, P. (2013). Representation learning: A review and new perspectives. TPAMI","DOI":"10.1109\/TPAMI.2013.50"},{"key":"1657_CR20","doi-asserted-by":"crossref","unstructured":"Ben-Younes, H., Cadene, R., Thome, N., & Cord, M. (2019). Block: Bilinear superdiagonal fusion for visual question answering and visual relationship detection. In AAAI","DOI":"10.1609\/aaai.v33i01.33018102"},{"key":"1657_CR21","unstructured":"Ben-Younes, H., \u00c9loi, Zablocki., P\u00e9rez, P., Cord, M. (2020). Driving behavior explanation with multi-level fusion. Machine Learning for Autonomous Driving Workshop ML4AD@NeurIPS"},{"key":"1657_CR22","doi-asserted-by":"crossref","unstructured":"Ben-Younes, H., Zablocki, \u00c9., Chen, M., P\u00e9rez, P., & Cord, M. (2022). Raising context awareness in motion forecasting. CVPR Workshop on Autonomous Driving (WAD)","DOI":"10.1109\/CVPRW56347.2022.00487"},{"key":"1657_CR23","unstructured":"Besserve, M., Mehrjou, A., Sun, R., & Sch\u00f6lkopf, B. (2020). Counterfactuals uncover the modular structure of deep generative models. In ICLR"},{"key":"1657_CR24","unstructured":"Bojarski, M., Chen, C., Daw, J., Degirmenci, A., Deri, J., Firner, B., Flepp, B., Gogri, S., Hong, J., Jackel, LD., Jia, Z., Lee, BJ., Liu, B., Liu, F., Muller, U., Payne, S., Prasad, NKN., Provodin, A., Roach, J., Rvachov, T., Tadimeti, N., van Engelen, J., Wen, H., Yang, E., & Yang, Z. (2020). The NVIDIA pilotnet experiments. CoRR"},{"key":"1657_CR25","doi-asserted-by":"crossref","unstructured":"Bojarski, M., Choromanska, A., Choromanski, K., Firner, B., Ackel, LJ., Muller, U., Yeres, P., & Zieba, K. (2018). Visualbackprop: Efficient visualization of cnns for autonomous driving. In ICRA","DOI":"10.1109\/ICRA.2018.8461053"},{"key":"1657_CR26","unstructured":"Bojarski, M., Testa, DD., Dworakowski, D., Firner, B., Flepp, B., Goyal, P., Jackel, LD., Monfort, M., Muller, U., Zhang, J., Zhang, X., Zhao, J., & Zieba, K. (2016). End to end learning for self-driving cars. CoRR"},{"key":"1657_CR27","unstructured":"Bojarski, M., Yeres, P., Choromanska, A., Choromanski, K., Firner, B., Jackel, LD., & Muller, U. (2017). Explaining how a deep neural network trained with end-to-end learning steers a car. CoRR"},{"key":"1657_CR28","doi-asserted-by":"crossref","unstructured":"Borg, M., Englund, C., Wnuk, K., Durann, B., Lewandowski, C., Gao, S., Tan, Y., Kaijser, H., L\u00f6nn, H., & T\u00f6rnqvist, J. (2019). Safely entering the deep: A review of verification and validation for machine learning and a challenge elicitation in the automotive industry. Journal of Automotive Software Engineering","DOI":"10.2991\/jase.d.190131.001"},{"key":"1657_CR29","unstructured":"Bowles, C., Chen, L., Guerrero, R., Bentley, P., Gunn, R. N., Hammers, A., Dickie, D. A., del C\u00a0Vald\u00e9s\u00a0Hern\u00e1ndez, M., Wardlaw, J. M., & Rueckert, D. (2018). GAN augmentation: Augmenting training data using generative adversarial networks. CoRR"},{"key":"1657_CR30","unstructured":"Brown, K., Driggs-Campbell, K., & Kochenderfer, M. J. (2020). A taxonomy and review of algorithms for modeling and predicting human driver behavior. arXiv preprint arXiv:2006.08832"},{"key":"1657_CR31","unstructured":"Bykov, K., H\u00f6hne, M. M., M\u00fcller, K., Nakajima, S., Kloft, M. (2020). How much can I trust you? - quantifying uncertainties in explaining neural networks. CoRR"},{"key":"1657_CR32","doi-asserted-by":"crossref","unstructured":"Cad\u00e8ne, R., Ben-younes, H., Cord, M., & Thome, N. (2019a). MUREL: multimodal relational reasoning for visual question answering. In CVPR","DOI":"10.1109\/CVPR.2019.00209"},{"key":"1657_CR33","unstructured":"Cad\u00e8ne, R., Dancette, C., Ben-younes, H., Cord, M., & Parikh, D. (2019b). Rubi: Reducing unimodal biases for visual question answering. In NeurIPS"},{"key":"1657_CR34","doi-asserted-by":"crossref","unstructured":"Caesar, H., Bankiti, V., Lang, AH., Vora, S., Liong, VE., Xu, Q., Krishnan, A., Pan, Y., Baldan, G., & Beijbom, O. (2020). nuscenes: A multimodal dataset for autonomous driving. In CVPR","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"1657_CR35","doi-asserted-by":"crossref","unstructured":"Caltagirone, L., Bellone, M., Svensson, L., & Wahde, M. (2017). Lidar-based driving path generation using fully convolutional neural networks. In ITSC","DOI":"10.1109\/IVS.2017.7995848"},{"key":"1657_CR36","unstructured":"Camburu, O., Rockt\u00e4schel, T., Lukasiewicz, T., & Blunsom, P. (2018). e-snli: Natural language inference with natural language explanations. In NeurIPS"},{"key":"1657_CR37","doi-asserted-by":"crossref","unstructured":"Casas, S., Sadat, A., & Urtasun, R. (2021). MP3: A unified model to map, perceive, predict and plan. In CVPR","DOI":"10.1109\/CVPR46437.2021.01417"},{"key":"1657_CR38","doi-asserted-by":"crossref","unstructured":"Castrej\u00f3n, L., Aytar, Y., Vondrick, C., Pirsiavash, H., & Torralba, A. (2016). Learning aligned cross-modal representations from weakly aligned data. In CVPR","DOI":"10.1109\/CVPR.2016.321"},{"key":"1657_CR39","doi-asserted-by":"crossref","unstructured":"Chan, F., Chen, Y., Xiang, Y., & Sun, M. (2016). Anticipating accidents in dashcam videos. In ACCV","DOI":"10.1007\/978-3-319-54190-7_9"},{"key":"1657_CR40","doi-asserted-by":"crossref","unstructured":"Chang, Y., Liu, Z. Y., & Hsu, W. H. (2019). Vornet: Spatio-temporally consistent video inpainting for object removal. In CVPR Workshops","DOI":"10.1109\/CVPRW.2019.00229"},{"key":"1657_CR41","unstructured":"Chen, X., Duan, Y., Houthooft, R., Schulman, J., Sutskever, I., Abbeel, P. (2016). Infogan: Interpretable representation learning by information maximizing generative adversarial nets. In NIPS"},{"key":"1657_CR42","unstructured":"Chen, J., Li, S. E., & Tomizuka, M. (2020a). Interpretable end-to-end urban autonomous driving with latent deep reinforcement learning. CoRR"},{"key":"1657_CR43","unstructured":"Chen, C., Li, O., Tao, D., Barnett, A., Rudin, C., & Su, J. (2019). This looks like that: Deep learning for interpretable image recognition. In NeurIPS"},{"key":"1657_CR44","doi-asserted-by":"crossref","unstructured":"Chen, Y., Rong, F., Duggal, S., Wang, S., Yan, X., Manivasagam, S., Xue, S., Yumer, E., & Urtasun, R. (2021). Geosim: Realistic video simulation via geometry-aware composition for self-driving. In CVPR","DOI":"10.1109\/CVPR46437.2021.00715"},{"key":"1657_CR45","doi-asserted-by":"crossref","unstructured":"Chen, X., Zhang, Y., Wang, Y., Shu, H., Xu, C., & Xu, C. (2020b). Optical flow distillation: Towards efficient and stable video style transfer. In ECCV","DOI":"10.1007\/978-3-030-58539-6_37"},{"key":"1657_CR46","doi-asserted-by":"crossref","unstructured":"Chitta, K., Prakash, A., & Geiger, A. (2021). NEAT: neural attention fields for end-to-end autonomous driving. In ICCV","DOI":"10.1109\/ICCV48922.2021.01550"},{"key":"1657_CR47","doi-asserted-by":"crossref","unstructured":"Choi, J. K., & Ji, Y. G. (2015). Investigating the importance of trust on adopting an autonomous vehicle. IJHCI","DOI":"10.1080\/10447318.2015.1070549"},{"key":"1657_CR48","doi-asserted-by":"crossref","unstructured":"Codevilla, F., Miiller, M., L\u00f3pez, A., Koltun, V., & Dosovitskiy, A. (2018). End-to-end driving via conditional imitation learning. In ICRA","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"1657_CR49","doi-asserted-by":"crossref","unstructured":"Codevilla, F., Santana, E., L\u00f3pez, A. M., Gaidon, A. (2019). Exploring the limitations of behavior cloning for autonomous driving. In ICCV","DOI":"10.1109\/ICCV.2019.00942"},{"key":"1657_CR50","doi-asserted-by":"crossref","unstructured":"Corbi\u00e8re, C., Thome, N., Saporta, A., Vu, T., Cord, M., & P\u00e9rez, P. (2020). Confidence estimation via auxiliary models. PAMI","DOI":"10.1109\/TPAMI.2021.3085983"},{"key":"1657_CR51","doi-asserted-by":"crossref","unstructured":"Corso, A., Du, P., Driggs-Campbell, K. R., Kochenderfer, M. J. (2019). Adaptive stress testing with reward augmentation for autonomous vehicle validation. In ITSC","DOI":"10.1109\/ITSC.2019.8917242"},{"key":"1657_CR52","doi-asserted-by":"crossref","unstructured":"Cui, H., Radosavljevic, V., Chou, F., Lin, T., Nguyen, T., Huang, T., Schneider, J., & Djuric, N. (2019). Multimodal trajectory predictions for autonomous driving using deep convolutional networks. In ICRA","DOI":"10.1109\/ICRA.2019.8793868"},{"key":"1657_CR53","doi-asserted-by":"crossref","unstructured":"Cultrera, L., Seidenari, L., Becattini, F., Pala, P., & Bimbo, A. D. (2020). Explaining autonomous driving by learning end-to-end visual attention. In CVPR Workshops","DOI":"10.1109\/CVPRW50498.2020.00178"},{"key":"1657_CR54","unstructured":"Das, A. & Rad, P. (2020). Opportunities and challenges in explainable artificial intelligence (XAI): A survey. CoRR"},{"key":"1657_CR55","unstructured":"de Haan, P., Jayaraman, D., & Levine, S. (2019). Causal confusion in imitation learning. In NeurIPS"},{"key":"1657_CR56","doi-asserted-by":"crossref","unstructured":"Deng, Y., Zheng, J. X., Zhang, T., Chen, C., Lou, G., & Kim, M. (2020). An analysis of adversarial attacks and defenses on autonomous driving models. In PerCom","DOI":"10.1109\/PerCom45495.2020.9127389"},{"key":"1657_CR57","doi-asserted-by":"crossref","unstructured":"Di, X. & Shi, R. (2020). A survey on autonomous vehicle control in the era of mixed-autonomy: From physics-based to ai-guided driving policy learning. CoRR","DOI":"10.1016\/j.trc.2021.103008"},{"key":"1657_CR58","unstructured":"Dickmanns, E. D. (2002). The development of machine vision for road vehicles in the last decade. In IV"},{"key":"1657_CR59","doi-asserted-by":"crossref","unstructured":"Ding, S. & Koehn, P. (2021). Evaluating saliency methods for neural language models. In NAACL","DOI":"10.18653\/v1\/2021.naacl-main.399"},{"key":"1657_CR60","doi-asserted-by":"crossref","unstructured":"Djuric, N., Radosavljevic, V., Cui, H., Nguyen, T., Chou, F., Lin, T., Singh, N., & Schneider, J. (2020). Uncertainty-aware short-term motion prediction of traffic actors for autonomous driving. In WACV","DOI":"10.1109\/WACV45572.2020.9093332"},{"key":"1657_CR61","unstructured":"Doshi-Velez, F., & Kim, B. (2017). Towards a rigorous science of interpretable machine learning. CoRR"},{"key":"1657_CR62","doi-asserted-by":"crossref","unstructured":"Doshi-Velez, F., & Kortz, M. A. (2017). Accountability of ai under the law: The role of explanation. CoRR","DOI":"10.2139\/ssrn.3064761"},{"key":"1657_CR63","unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., L\u00f3pez, A., & Koltun, V. (2017). CARLA: an open urban driving simulator. In CoRL"},{"key":"1657_CR64","doi-asserted-by":"crossref","unstructured":"Du, N., Haspiel, J., Zhang, Q., Tilbury, D., Pradhan, A. K., Yang, X. J., & Robert Jr, L.P. (2019). Look who\u2019s talking now: Implications of av\u2019s explanations on driver\u2019s trust, av preference, anxiety and mental workload. Transportation research part C: emerging technologies","DOI":"10.2139\/ssrn.3392063"},{"key":"1657_CR65","unstructured":"Erhan, D., Bengio, Y., Courville, A., & Vincent, P. (2009). Visualizing higher-layer features of a deep network. Technical Report, University of Montreal"},{"key":"1657_CR66","unstructured":"Espi\u00e9, E., Guionneau, C., Wymann, B., Dimitrakakis, C., Coulom, R., & Sumner, A. (2005). Torcs, the open racing car simulator"},{"key":"1657_CR67","doi-asserted-by":"crossref","unstructured":"Fellous, J. M., Sapiro, G., Rossi, A., Mayberg, H. S., & Ferrante, M. (2019). Explainable artificial intelligence for neuroscience: Behavioral neurostimulation. Frontiers in Neuroscience","DOI":"10.3389\/fnins.2019.01346"},{"key":"1657_CR68","doi-asserted-by":"crossref","unstructured":"Fong, R. C., & Vedaldi, A. (2017). Interpretable explanations of black boxes by meaningful perturbation. In ICCV","DOI":"10.1109\/ICCV.2017.371"},{"key":"1657_CR69","unstructured":"Frosst, N. & Hinton, G. E. (2017). Distilling a neural network into a soft decision tree. In Workshop on comprehensibility and explanation in AI and ML @AI*IA 2017"},{"key":"1657_CR70","doi-asserted-by":"crossref","unstructured":"Gao, C., Saraf, A., Huang, J., & Kopf, J. (2020). Flow-edge guided video completion. In ECCV","DOI":"10.1007\/978-3-030-58610-2_42"},{"key":"1657_CR71","doi-asserted-by":"crossref","unstructured":"Garfinkel, S., Matthews, J., Shapiro, S. S., & Smith, J. M. (2017). Toward algorithmic transparency and accountability. Communications ACM","DOI":"10.1145\/3125780"},{"key":"1657_CR72","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Stiller, C., & Urtasun, R. (2013). Vision meets robotics: The KITTI dataset. IJRR","DOI":"10.1177\/0278364913491297"},{"key":"1657_CR73","doi-asserted-by":"crossref","unstructured":"Geng, Z., Cao, C., & Tulyakov, S. (2020). Towards photo-realistic facial expression manipulation. IJCV","DOI":"10.1007\/s11263-020-01361-8"},{"key":"1657_CR74","doi-asserted-by":"crossref","unstructured":"Ghorbani, A., Abid, A., & Zou, J. Y. (2019). Interpretation of neural networks is fragile. In AAAI","DOI":"10.1609\/aaai.v33i01.33013681"},{"key":"1657_CR75","doi-asserted-by":"crossref","unstructured":"Gilpin, L. H., Bau, D., Yuan, B. Z., Bajwa, A., Specter, M., & Kagal, L. (2018). Explaining explanations: An overview of interpretability of machine learning. In DSSA","DOI":"10.1109\/DSAA.2018.00018"},{"key":"1657_CR76","unstructured":"Goyal, Y., Wu, Z., Ernst, J., Batra, D., Parikh, D., & Lee, S. (2019). Counterfactual visual explanations. In ICML"},{"key":"1657_CR77","doi-asserted-by":"crossref","unstructured":"Guidotti, R., Monreale, A., Ruggieri, S., Turini, F., Giannotti, F., & Pedreschi, D. (2018). A survey of methods for explaining black box models. ACM Computer Survey","DOI":"10.1145\/3236009"},{"key":"1657_CR78","unstructured":"Harradon, M., Druce, J., & Ruttenberg, B.E. (2018). Causal learning and explanation of deep neural networks via autoencoded activations. CoRR"},{"key":"1657_CR79","doi-asserted-by":"crossref","unstructured":"Hase, P., Zhang, S., Xie, H., & Bansal, M. (2020). Leakage-adjusted simulatability: Can models generate non-trivial explanations of their behavior in natural language? In T. Cohn, Y. He, Y. Liu (Eds.) EMNLP (Findings)","DOI":"10.18653\/v1\/2020.findings-emnlp.390"},{"key":"1657_CR80","doi-asserted-by":"crossref","unstructured":"Haspiel, J., Du, N., Meyerson, J., Jr L. P. R., Tilbury, D. M., Yang, X. J., & Pradhan, A. K. (2018). Explanations and expectations: Trust building in automated vehicles. In HRI","DOI":"10.1145\/3173386.3177057"},{"key":"1657_CR81","doi-asserted-by":"crossref","unstructured":"Hecker, S., Dai, D., Liniger, A., & Gool, L.V. (2020). Learning accurate and human-like driving using semantic maps and attention. CoRR","DOI":"10.1109\/IROS45743.2020.9341157"},{"key":"1657_CR82","doi-asserted-by":"crossref","unstructured":"Hendricks, LA., Akata, Z., Rohrbach, M., Donahue, J., Schiele, B., & Darrell, T. (2016). Generating visual explanations. In ECCV","DOI":"10.1007\/978-3-319-46493-0_1"},{"key":"1657_CR83","doi-asserted-by":"crossref","unstructured":"Hendricks, L. A., Hu, R., Darrell, T., & Akata, Z. (2018). Grounding visual explanations. In ECCV","DOI":"10.1007\/978-3-030-01216-8_17"},{"key":"1657_CR84","unstructured":"Herman, B. (2017). The promise and peril of human evaluation for model interpretability. CoRR"},{"key":"1657_CR85","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., & Hochreiter, S. (2017). Gans trained by a two time-scale update rule converge to a local nash equilibrium. In NIPS"},{"key":"1657_CR86","unstructured":"Hooker, S., Erhan, D., Kindermans, P., & Kim, B. (2019). A benchmark for interpretability methods in deep neural networks"},{"key":"1657_CR87","doi-asserted-by":"crossref","unstructured":"Hu, R., Andreas, J., Rohrbach, M., Darrell, T., & Saenko, K. (2017). Learning to reason: End-to-end module networks for visual question answering. In ICCV","DOI":"10.1109\/ICCV.2017.93"},{"key":"1657_CR88","unstructured":"Jacob, P., Zablocki, \u00c9., Ben-Younes, H., Chen, M., P\u00e9rez, P., & Cord, M. (2021). STEEX: steering counterfactual explanations with semantics. CoRR arXiv: abs\/2111.09094"},{"key":"1657_CR89","doi-asserted-by":"crossref","unstructured":"Jacovi, A. & Goldberg, Y. (2020a). Aligning faithful interpretations with their social attribution. TACL","DOI":"10.1162\/tacl_a_00367"},{"key":"1657_CR90","doi-asserted-by":"crossref","unstructured":"Jacovi, A. & Goldberg, Y. (2020b). Towards faithfully interpretable NLP systems: How should we define and evaluate faithfulness? In ACL","DOI":"10.18653\/v1\/2020.acl-main.386"},{"key":"1657_CR91","unstructured":"Jain, S. & Wallace, B. C. (2019). Attention is not explanation. In NAACL"},{"key":"1657_CR92","doi-asserted-by":"crossref","unstructured":"Janai, J., G\u00fcney, F., Behl, A., & Geiger, A. (2020a). Computer vision for autonomous vehicles: Problems, datasets and state of the art. Found Trends Computer Graph Vision","DOI":"10.1561\/9781680836899"},{"key":"1657_CR93","doi-asserted-by":"crossref","unstructured":"Janai, J., G\u00fcney, F., Behl, A., & Geiger, A. (2020b). Computer vision for autonomous vehicles: Problems, datasets and state of the art. Foundations and Trends\u00ae in Computer Graphics and Vision. 12(1), 1\u2013308","DOI":"10.1561\/0600000079"},{"key":"1657_CR94","doi-asserted-by":"crossref","unstructured":"Jansen, P., Smith, K., Moreno, D., & Ortiz, H. (2021). On the challenges of evaluating compositional explanations in multi-hop inference: Relevance, completeness, and expert ratings. CoRRarXiv:2109.03334","DOI":"10.18653\/v1\/2021.emnlp-main.596"},{"key":"1657_CR95","doi-asserted-by":"crossref","unstructured":"Johnson, J., Hariharan, B., van\u00a0der Maaten, L., Fei-Fei, L., Zitnick, C. L., & Girshick, R. B. (2017). CLEVR: A diagnostic dataset for compositional language and elementary visual reasoning. In CVPR","DOI":"10.1109\/CVPR.2017.215"},{"key":"1657_CR96","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., & Aila, T. (2019). A style-based generator architecture for generative adversarial networks. In CVPR","DOI":"10.1109\/CVPR.2019.00453"},{"key":"1657_CR97","doi-asserted-by":"crossref","unstructured":"Kim, J. & Canny, J. F. (2017). Interpretable learning for self-driving cars by visualizing causal attention. In ICCV","DOI":"10.1109\/ICCV.2017.320"},{"key":"1657_CR98","doi-asserted-by":"crossref","unstructured":"Kim, J., & Bansal, M. (2020). Attentional bottleneck: Towards an interpretable deep driving network. In CVPR Workshops","DOI":"10.1109\/CVPRW50498.2020.00169"},{"key":"1657_CR99","doi-asserted-by":"crossref","unstructured":"Kim, H., Lee, K., Hwang, G., & Suh, C. (2019). Crash to not crash: Learn to identify dangerous vehicles using a simulator. In AAAI","DOI":"10.1609\/aaai.v33i01.3301978"},{"key":"1657_CR100","doi-asserted-by":"crossref","unstructured":"Kim, J., Rohrbach, A., Darrell, T., Canny, J. F., & Akata, Z. (2018). Textual explanations for self-driving vehicles. In ECCV","DOI":"10.1007\/978-3-030-01216-8_35"},{"key":"1657_CR101","unstructured":"Kiran, BR., Sobh, I., Talpaert, V., Mannion, P., Sallab, A. A. A., Yogamani, S. K., & P\u00e9rez, P. (2020). Deep reinforcement learning for autonomous driving: A survey. CoRR"},{"key":"1657_CR102","doi-asserted-by":"crossref","unstructured":"Koo, J., Kwac, J., Ju, W., Steinert, M., Leifer, L., & Nass, C. (2015). Why did my car just do that? explaining semi-autonomous driving actions to improve driver understanding, trust, and performance. IJIDeM","DOI":"10.1007\/s12008-014-0227-2"},{"key":"1657_CR103","doi-asserted-by":"crossref","unstructured":"Koren, M., Alsaif, S., Lee, R., & Kochenderfer, M. J. (2018). Adaptive stress testing for autonomous vehicles. In IV","DOI":"10.1109\/IVS.2018.8500400"},{"key":"1657_CR104","doi-asserted-by":"crossref","unstructured":"Kr\u00e4henb\u00fchl, P. (2018). Free supervision from video games. In CVPR","DOI":"10.1109\/CVPR.2018.00312"},{"key":"1657_CR105","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In NIPS"},{"key":"1657_CR106","unstructured":"Lample, G., Zeghidour, N., Usunier, N., Bordes, A., Denoyer, L., & Ranzato, M. (2017). Fader networks: Manipulating images by sliding attributes. In NIPS"},{"key":"1657_CR107","doi-asserted-by":"crossref","unstructured":"LeCun, Y., Bengio, Y., & Hinton, G. E. (2015). Deep learning. Nature","DOI":"10.1038\/nature14539"},{"key":"1657_CR108","doi-asserted-by":"crossref","unstructured":"Lee, J. & Moray, N. (1992). Trust, control strategies and allocation of function in human-machine systems. Ergonomics","DOI":"10.1080\/00140139208967392"},{"key":"1657_CR109","doi-asserted-by":"crossref","unstructured":"Lee, J. D., & Moray, N. (1994). Trust, self-confidence, and operators\u2019 adaptation to automation. International Journal of Human-computer Studies","DOI":"10.1006\/ijhc.1994.1007"},{"key":"1657_CR110","doi-asserted-by":"crossref","unstructured":"Lee, J. D., & See, K. A. (2004). Trust in automation: Designing for appropriate reliance. Human Factors","DOI":"10.1518\/hfes.46.1.50.30392"},{"key":"1657_CR111","doi-asserted-by":"crossref","unstructured":"Lee, N., Choi, W., Vernaza, P., Choy, C. B., Torr, P. H. S., & Chandraker, M. (2017). DESIRE: distant future prediction in dynamic scenes with interacting agents. In CVPR","DOI":"10.1109\/CVPR.2017.233"},{"key":"1657_CR112","doi-asserted-by":"crossref","unstructured":"Lee, R., Kochenderfer, M. J., Mengshoel, O. J., & Silbermann, J. (2018). Interpretable categorization of heterogeneous time series data. In SDM","DOI":"10.1137\/1.9781611975321.25"},{"key":"1657_CR113","doi-asserted-by":"crossref","unstructured":"Leonard, J., How, J., Teller, S., Berger, M., Campbell, S., Fiore, G., Fletcher, L., Frazzoli, E., Huang, A., Karaman, S., & Koch, O. (2008). A perception-driven autonomous urban vehicle. Journal of Field Robotics","DOI":"10.1002\/rob.20262"},{"key":"1657_CR114","doi-asserted-by":"crossref","unstructured":"Li, C., Chan, S. H., & Chen, Y. (2020c). Who make drivers stop? towards driver-centric risk assessment: Risk object identification via causal inference. In IROS","DOI":"10.1109\/IROS45743.2020.9341072"},{"key":"1657_CR115","unstructured":"Li, Z., Motoyoshi, T., Sasaki, K., Ogata, T., & Sugano, S. (2018b). Rethinking self-driving: Multi-task knowledge for better generalization and accident explanation ability. CoRR"},{"key":"1657_CR116","doi-asserted-by":"crossref","unstructured":"Li, B., Qi, X., Lukasiewicz, T., & Torr, P. H. S. (2020a). Manigan: Text-guided image manipulation. In CVPR","DOI":"10.1109\/CVPR42600.2020.00790"},{"key":"1657_CR117","doi-asserted-by":"crossref","unstructured":"Li, B., Qi, X., Torr, P. H. S., & Lukasiewicz, T. (2020b). Lightweight generative adversarial networks for text-guided image manipulation. In NeurIPS","DOI":"10.1109\/CVPR42600.2020.00790"},{"key":"1657_CR118","doi-asserted-by":"crossref","unstructured":"Li, Q., Tao, Q., Joty, S. R., Cai, J., & Luo, J. (2018a). VQA-E: explaining, elaborating, and enhancing your answers for visual questions. In ECCV","DOI":"10.1007\/978-3-030-01234-2_34"},{"key":"1657_CR119","unstructured":"Li, Y., Torralba, A., Anandkumar, A., Fox, D., & Garg, A. (2020d). Causal discovery in physical systems from videos. NeurIPS"},{"key":"1657_CR120","doi-asserted-by":"crossref","unstructured":"Lipton, Z. C. (2018). The mythos of model interpretability. Communications ACM","DOI":"10.1145\/3233231"},{"key":"1657_CR121","doi-asserted-by":"crossref","unstructured":"Liu, Y., Hsieh, Y., Chen, M., Yang, C. H., Tegn\u00e9r, J., & Tsai, Y. J. (2020). Interpretable self-attention temporal reasoning for driving behavior understanding. In ICASSP","DOI":"10.1109\/ICASSP40776.2020.9053783"},{"key":"1657_CR122","doi-asserted-by":"crossref","unstructured":"Liu, G., Reda, F. A., Shih, K. J., Wang, T., Tao, A., & Catanzaro, B. (2018). Image inpainting for irregular holes using partial convolutions. In ECCV","DOI":"10.1007\/978-3-030-01252-6_6"},{"key":"1657_CR123","doi-asserted-by":"crossref","unstructured":"Liu, H., Yin, Q., & Wang, W. Y. (2019). Towards explainable NLP: A generative explanation framework for text classification. In ACL","DOI":"10.18653\/v1\/P19-1560"},{"key":"1657_CR124","doi-asserted-by":"crossref","unstructured":"Lu, X., Tolmachev, A., Yamamoto, T., Takeuchi, K., Okajima, S., Takebayashi, T., Maruhashi, K., & Kashima, H. (2021). Crowdsourcing evaluation of saliency-based XAI methods. In ECML-PKDD","DOI":"10.1007\/978-3-030-86517-7_27"},{"key":"1657_CR125","unstructured":"Lu, J., Yang, J., Batra, D., & Parikh, D. (2016). Hierarchical question-image co-attention for visual question answering. In NIPS"},{"key":"1657_CR126","unstructured":"Lundberg, S. M., & Lee, S. (2017). A unified approach to interpreting model predictions. In NIPS"},{"key":"1657_CR127","doi-asserted-by":"crossref","unstructured":"Ly, A. O., & Akhloufi, M. A. (2020). Learning to drive by imitation: an overview of deep behavior cloning methods. T-IV","DOI":"10.1109\/TIV.2020.3002505"},{"key":"1657_CR128","unstructured":"Maaten, Lvd., & Hinton, G. (2008). Visualizing data using t-sne. JMLR"},{"key":"1657_CR129","doi-asserted-by":"crossref","unstructured":"Mac\u00a0Aodha, O., Su, S., Chen, Y., Perona, P., & Yue, Y. (2018). Teaching categories to human learners with visual explanations. In CVPR","DOI":"10.1109\/CVPR.2018.00402"},{"key":"1657_CR130","doi-asserted-by":"crossref","unstructured":"Madumal, P., Miller, T,. Sonenberg, L., & Vetere, F. (2020). Explainable reinforcement learning through a causal lens. In AAAI","DOI":"10.1609\/aaai.v34i03.5631"},{"key":"1657_CR131","unstructured":"Makino, T., Jastrzebski, S., Oleszkiewicz, W., Chacko, C., Ehrenpreis, R., Samreen, N., Chhor, C., Kim, E., Lee, J., Pysarenko, K., Reig, B., Toth, H., Awal, D., Du, L., Kim, A., Park, J., Sodickson, D. K., Heacock, L., Moy, L., Cho, K., & Geras, K. J. (2020). Differences between human and machine perception in medical diagnosis. CoRR"},{"key":"1657_CR132","doi-asserted-by":"crossref","unstructured":"Malinowski, M., Rohrbach, M., & Fritz, M. (2017). Ask your neurons: A deep learning approach to visual question answering. IJCV","DOI":"10.1007\/s11263-017-1038-2"},{"key":"1657_CR133","unstructured":"Manzo, U. G., Chiroma, H., Aljojo, N., Abubakar, S., Popoola, S. I., & Al-Garadi, M. A. (2020). A survey on deep learning for steering angle prediction in autonomous vehicles. IEEE Access"},{"key":"1657_CR134","doi-asserted-by":"crossref","unstructured":"Maximov, M., Elezi, I., & Leal-Taix\u00e9, L. (2020). CIAGAN: conditional identity anonymization generative adversarial networks. In CVPR","DOI":"10.1109\/CVPR42600.2020.00549"},{"key":"1657_CR135","doi-asserted-by":"crossref","unstructured":"McAllister, R., Gal, Y., Kendall, A., van der Wilk, M,. Shah, A., Cipolla, R., & Weller, A. (2017). Concrete problems for autonomous vehicle safety: Advantages of bayesian deep learning. In IJCAI","DOI":"10.24963\/ijcai.2017\/661"},{"key":"1657_CR136","unstructured":"Mehrabi, N,. Morstatter, F., Saxena, N., Lerman, K., & Galstyan, A. (2019). A survey on bias and fairness in machine learning. CoRR"},{"key":"1657_CR137","doi-asserted-by":"crossref","unstructured":"Mehta, A., Subramanian, A., & Subramanian, A. (2018). Learning end-to-end autonomous driving using guided auxiliary supervision. In ICVGIP","DOI":"10.1145\/3293353.3293364"},{"key":"1657_CR138","doi-asserted-by":"crossref","unstructured":"Michon, J. (1984). A critical view of driver behavior models: What do we know, what should we do? Human behavior and traffic safety","DOI":"10.1007\/978-1-4613-2173-6_19"},{"key":"1657_CR139","unstructured":"Mohseni, S., Jagadeesh, A., & Wang, Z. (2019). Predicting model failure using saliency maps in autonomous driving systems. Workshop on Uncertainty and Robustness in Deep Learning @ICML"},{"key":"1657_CR140","unstructured":"Moing, G. L., Vu, T., Jain, H., P\u00e9rez, P., & Cord, M. (2021). Semantic palette: Guiding scene generation with class proportions. In CVPR"},{"key":"1657_CR141","doi-asserted-by":"crossref","unstructured":"Molnar, C. (2019). Interpretable machine learning","DOI":"10.21105\/joss.00786"},{"key":"1657_CR142","doi-asserted-by":"crossref","unstructured":"Moraffah, R., Karami, M., Guo, R., Raglin, A., & Liu, H. (2020). Causal interpretability for machine learning - problems, methods and evaluation. SIGKDD Explorations","DOI":"10.1145\/3400051.3400058"},{"key":"1657_CR143","unstructured":"Mordan, T., Cord, M., P\u00e9rez, P., & Alahi, A. (2020). Detecting 32 pedestrian attributes for autonomous vehicles. CoRR"},{"key":"1657_CR144","unstructured":"Morgulis, N., Kreines, A., Mendelowitz, S., & Weisglass, Y. (2019). Fooling a real car with adversarial traffic signs. CoRR"},{"key":"1657_CR145","doi-asserted-by":"crossref","unstructured":"Mori, K., Fukui, H., Murase, T., Hirakawa, T., Yamashita, T., & Fujiyoshi, H. (2019). Visual explanation by attention branch network for end-to-end learning-based self-driving. In IV","DOI":"10.1109\/IVS.2019.8813900"},{"key":"1657_CR146","doi-asserted-by":"crossref","unstructured":"Morton, J. & Kochenderfer, M. J. (2017). Simultaneous policy learning and latent state inference for imitating driver behavior. In ITSC","DOI":"10.1109\/ITSC.2017.8317738"},{"key":"1657_CR147","unstructured":"M\u00fcller, M., Dosovitskiy, A., Ghanem, B., & Koltun, V. (2018). Driving policy transfer via modularity and abstraction. In CoRL"},{"key":"1657_CR148","unstructured":"Narendra, T., Sankaran, A., Vijaykeerthy, D., & Mani, S. (2018). Explaining deep learning models using causal inference. CoRR"},{"key":"1657_CR149","unstructured":"Nguyen, A. M., Dosovitskiy, A., Yosinski, J., Brox, T., & Clune, J. (2016). Synthesizing the preferred inputs for neurons in neural networks via deep generator networks. In NIPS"},{"key":"1657_CR150","doi-asserted-by":"crossref","unstructured":"Omeiza, D., Webb, H., Jirotka, M., & Kunze, L. (2021). Explanations in autonomous driving: A survey. CoRR","DOI":"10.1109\/TITS.2021.3122865"},{"key":"1657_CR151","unstructured":"Oramas, J., Wang, K., & Tuytelaars, T. (2019). Visual explanation by interpretation: Improving visual feedback capabilities of deep neural networks. In ICLR"},{"key":"1657_CR152","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., & Zhu, W. (2002). Bleu: a method for automatic evaluation of machine translation. In ACL","DOI":"10.3115\/1073083.1073135"},{"key":"1657_CR153","doi-asserted-by":"crossref","unstructured":"Park, DH., Hendricks, LA., Akata, Z., Rohrbach, A., Schiele, B., Darrell, T., & Rohrbach, M. (2018). Multimodal explanations: Justifying decisions and pointing to the evidence. In CVPR","DOI":"10.1109\/CVPR.2018.00915"},{"key":"1657_CR154","doi-asserted-by":"crossref","unstructured":"Patro, B. N., Anupriy, & Namboodiri, V. (2020). Explanation vs attention: A two-player game to obtain attention for VQA. In AAAI","DOI":"10.1609\/aaai.v34i07.6858"},{"key":"1657_CR155","doi-asserted-by":"crossref","unstructured":"Pearl, J. (2009). Causality","DOI":"10.1017\/CBO9780511803161"},{"key":"1657_CR156","doi-asserted-by":"crossref","unstructured":"Pei, K., Cao, Y., Yang, J., & Jana, S. (2019). Deepxplore: automated whitebox testing of deep learning systems. Communicaitons ACM.","DOI":"10.1145\/3361566"},{"key":"1657_CR157","doi-asserted-by":"crossref","unstructured":"Phan-Minh, T., Grigore, E. C., Boulton, F. A., Beijbom, O., & Wolff, E. M. (2020). Covernet: Multimodal behavior prediction using trajectory sets. In CVPR","DOI":"10.1109\/CVPR42600.2020.01408"},{"key":"1657_CR158","unstructured":"Pomerleau, D. (1988). ALVINN: an autonomous land vehicle in a neural network. In NIPS"},{"key":"1657_CR159","unstructured":"Pu, Y., Gan, Z., Henao, R., Yuan, X., Li, C., Stevens, A., & Carin, L. (2016). Variational autoencoder for deep learning of images, labels and captions. In NIPS"},{"key":"1657_CR160","doi-asserted-by":"crossref","unstructured":"Rajani, N. F., McCann, B., Xiong, C., & Socher, R. (2019). Explain yourself! leveraging language models for commonsense reasoning. In ACL","DOI":"10.18653\/v1\/P19-1487"},{"key":"1657_CR161","unstructured":"Ramakrishnan, S., Agrawal, A., & Lee, S. (2018). Overcoming language priors in visual question answering with adversarial regularization. In NeurIPS"},{"key":"1657_CR162","doi-asserted-by":"crossref","unstructured":"Ramanishka, V., Chen, Y., Misu, T., & Saenko, K. (2018). Toward driving scene understanding: A dataset for learning driver behavior and causal reasoning. In CVPR","DOI":"10.1109\/CVPR.2018.00803"},{"key":"1657_CR163","unstructured":"Rathi, S. (2019). Generating counterfactual and contrastive explanations using SHAP. Workshop on Humanizing AI (HAI) @IJCAI"},{"key":"1657_CR164","doi-asserted-by":"crossref","unstructured":"Razavian, A. S., Azizpour, H., Sullivan, J., & Carlsson, S. (2014). CNN features off-the-shelf: An astounding baseline for recognition. In CVPR Workshops","DOI":"10.1109\/CVPRW.2014.131"},{"key":"1657_CR165","doi-asserted-by":"crossref","unstructured":"Ren, Z., Lee, Y. J., & Ryoo, M. S. (2018). Learning to anonymize faces for privacy preserving action detection. In: V. Ferrari, M. Hebert, C. Sminchisescu, Y. Weiss (Eds.) ECCV","DOI":"10.1007\/978-3-030-01246-5_38"},{"key":"1657_CR166","doi-asserted-by":"crossref","unstructured":"Rezvani, T., Driggs-Campbell, K. R., Sadigh, D., Sastry, S. S., Seshia, S. A., & Bajcsy, R. (2016). Towards trustworthy automation: User interfaces that convey internal and external awareness. In ITSC","DOI":"10.1109\/ITSC.2016.7795627"},{"key":"1657_CR167","doi-asserted-by":"crossref","unstructured":"Ribeiro, M. T., Singh, S., & Guestrin, C. (2016). \u201cwhy should I trust you?\u201d: Explaining the predictions of any classifier. In SIGKDD","DOI":"10.1145\/2939672.2939778"},{"key":"1657_CR168","doi-asserted-by":"crossref","unstructured":"Ribeiro, M. T., Singh, S., & Guestrin, C. (2018). Anchors: High-precision model-agnostic explanations. In AAAI","DOI":"10.1609\/aaai.v32i1.11491"},{"key":"1657_CR169","doi-asserted-by":"crossref","unstructured":"Riquelme, F., Goyeneche, A. D., Zhang, Y., Niebles, J. C., & Soto, A. (2020). Explaining VQA predictions using visual grounding and a knowledge base. Image Vision Computer","DOI":"10.1016\/j.imavis.2020.103968"},{"key":"1657_CR170","doi-asserted-by":"crossref","unstructured":"Rodr\u00edguez, P., Caccia, M., Lacoste, A., Zamparo, L., Laradji, I. H., Charlin, L., V\u00e1zquez, D. (2021). Beyond trivial counterfactual explanations with diverse valuable explanations. CoRR arXiv: abs\/2103.10226","DOI":"10.1109\/ICCV48922.2021.00109"},{"key":"1657_CR171","unstructured":"Rosenfeld, A. & Richardson, A. (2020). Why, who, what, when and how about explainability in human-agent systems. In AAMAS"},{"key":"1657_CR172","doi-asserted-by":"crossref","unstructured":"Sadat, A., Casas, S., Ren, M., Wu, X., Dhawan, P., & Urtasun, R. (2020). Perceive, predict, and plan: Safe motion planning through interpretable semantic representations. In ECCV","DOI":"10.1007\/978-3-030-58592-1_25"},{"key":"1657_CR173","doi-asserted-by":"crossref","unstructured":"Salzmann, T., Ivanovic, B., Chakravarty, P., & Pavone, M. (2020). Trajectron++: Dynamically-feasible trajectory forecasting with heterogeneous data. In H. Bischof, T. Brox, J. Frahm, & A. Vedaldi (Eds.) Lecture Notes in Computer Science: ECCV.","DOI":"10.1007\/978-3-030-58523-5_40"},{"issue":"11","key":"1657_CR174","doi-asserted-by":"publisher","first-page":"2660","DOI":"10.1109\/TNNLS.2016.2599820","volume":"28","author":"W Samek","year":"2017","unstructured":"Samek, W., Binder, A., Montavon, G., Lapuschkin, S., & M\u00fcller, K. (2017). Evaluating the visualization of what a deep neural network has learned. IEEE Transactions Neural Networks Learning System, 28(11), 2660\u20132673. https:\/\/doi.org\/10.1109\/TNNLS.2016.2599820.","journal-title":"IEEE Transactions Neural Networks Learning System"},{"key":"1657_CR175","unstructured":"Sato, M., & Tsukimoto, H. (2001). Rule extraction from neural networks via decision tree induction. In IJCNN"},{"key":"1657_CR176","unstructured":"Sauer, A., Savinov, N., & Geiger, A. (2018). Conditional affordance learning for driving in urban environments. In CoRL"},{"key":"1657_CR177","doi-asserted-by":"crossref","unstructured":"Selvaraju, R. R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D. (2020). Grad-cam: Visual explanations from deep networks via gradient-based localization. International Journal of Computer Vision","DOI":"10.1007\/s11263-019-01228-7"},{"key":"1657_CR178","doi-asserted-by":"crossref","unstructured":"Shariff, A., Bonnefon, J. F., & Rahwan, I. (2017). Psychological roadblocks to the adoption of self-driving vehicles. Nature Human Behaviour","DOI":"10.1038\/s41562-017-0202-6"},{"key":"1657_CR179","unstructured":"Shen, Y., Jiang, S., Chen, Y., Yang, E., Jin, X., Fan, Y., & Campbell, K. D. (2020). To explain or not to explain: A study on the necessity of explanations for autonomous vehicles. CoRR"},{"key":"1657_CR180","unstructured":"Shrikumar, A., Greenside, P., & Kundaje, A. (2017). Learning important features through propagating activation differences. In ICML"},{"key":"1657_CR181","unstructured":"Simonyan, K., Vedaldi, A., & Zisserman, A. (2014). Deep inside convolutional networks: Visualising image classification models and saliency maps. In ICLR"},{"key":"1657_CR182","unstructured":"Singla, S., Pollack, B., Chen, J., & Batmanghelich, K. (2020). Explanation by progressive exaggeration. In ICLR, OpenReview.net"},{"key":"1657_CR183","doi-asserted-by":"crossref","unstructured":"Srikanth, S., Ansari, J. A., R, K. R., Sharma, S., Murthy, J. K., & Krishna, K. M. (2019). INFER: intermediate representations for future prediction. In IROS","DOI":"10.1109\/IROS40897.2019.8968553"},{"key":"1657_CR184","doi-asserted-by":"crossref","unstructured":"Sun, Q., Ma, L., Oh, S. J., Gool, L. V., Schiele, B., & Fritz, M. (2018). Natural and effective obfuscation by head inpainting. In CVPR, Computer Vision Foundation\/IEEE Computer Society","DOI":"10.1109\/CVPR.2018.00530"},{"key":"1657_CR185","unstructured":"Sundararajan, M., Taly, A., & Yan, Q. (2017). Axiomatic attribution for deep networks. In ICML"},{"key":"1657_CR186","doi-asserted-by":"crossref","unstructured":"Suzuki, T., Kataoka, H., Aoki, Y., & Satoh, Y. (2018). Anticipating traffic accidents with adaptive loss and large-scale incident DB. In CVPR","DOI":"10.1109\/CVPR.2018.00371"},{"key":"1657_CR187","unstructured":"Szegedy, C., Zaremba, W,. Sutskever, I., Bruna, J., Erhan, D., Goodfellow, I. J., & Fergus, R. (2014). Intriguing properties of neural networks. In ICLR"},{"key":"1657_CR188","doi-asserted-by":"crossref","unstructured":"Tan, S., Wong, K., Wang, S., Manivasagam, S., Ren, M., & Urtasun, R. (2021). Scenegen: Learning to generate realistic traffic scenes. In CVPR","DOI":"10.1109\/CVPR46437.2021.00095"},{"key":"1657_CR189","doi-asserted-by":"crossref","unstructured":"Thrun, S., Montemerlo, M., Dahlkamp, H., Stavens, D., Aron, A., Diebel, J., Fong, P., Gale, J., Halpenny, M., Hoffmann, G., & Lau, K. (2006). Stanley: The robot that won the darpa grand challenge. Journal of field Robotics","DOI":"10.1007\/978-3-540-73429-1_1"},{"key":"1657_CR190","doi-asserted-by":"crossref","unstructured":"Tian, Y., Pei, K., Jana, S., & Ray, B. (2018). Deeptest: automated testing of deep-neural-network-driven autonomous cars. In ICSE","DOI":"10.1145\/3180155.3180220"},{"key":"1657_CR191","unstructured":"Tjoa, E. & Guan, C. (2019). A survey on explainable artificial intelligence (XAI): towards medical XAI. CoRR"},{"key":"1657_CR192","doi-asserted-by":"crossref","unstructured":"Tomei, M., Baraldi, L., Bronzin, S., & Cucchiara, R. (2021). Estimating (and fixing) the effect of face obfuscation in video recognition. In CVPR Workshops","DOI":"10.1109\/CVPRW53098.2021.00364"},{"key":"1657_CR193","doi-asserted-by":"crossref","unstructured":"Tommasi, T., Patricia, N., Caputo, B., & Tuytelaars, T. (2017). A deeper look at dataset bias. In Domain adaptation in computer vision applications","DOI":"10.1007\/978-3-319-58347-1_2"},{"key":"1657_CR194","doi-asserted-by":"crossref","unstructured":"Toromanoff, M., Wirbel, \u00c9., & Moutarde, F. (2020). End-to-end model-free reinforcement learning for urban driving using implicit affordances. In CVPR","DOI":"10.1109\/CVPR42600.2020.00718"},{"key":"1657_CR195","doi-asserted-by":"crossref","unstructured":"Torralba, A., & Efros, AA. (2011). Unbiased look at dataset bias. In CVPR","DOI":"10.1109\/CVPR.2011.5995347"},{"key":"1657_CR196","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L. D., Fergus, R., Torresani, L., & Paluri, M. (2015). Learning spatiotemporal features with 3d convolutional networks. In ICCV","DOI":"10.1109\/ICCV.2015.510"},{"key":"1657_CR197","doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M., Yang, X., & Kautz, J. (2018). Mocogan: Decomposing motion and content for video generation. In CVPR","DOI":"10.1109\/CVPR.2018.00165"},{"key":"1657_CR198","doi-asserted-by":"crossref","unstructured":"Urmson, C., Anhalt, J., Bagnell, D., Baker, C., Bittner, R., Clark, M.N., Dolan, J., Duggins, D., Galatali, T., Geyer, C. & Gittleman, M. (2008). Autonomous driving in urban environments: Boss and the urban challenge. Journal of Field Robotics","DOI":"10.1002\/rob.20255"},{"key":"1657_CR199","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Zitnick, C. L., & Parikh, D. (2015). Cider: Consensus-based image description evaluation. In CVPR","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"1657_CR200","unstructured":"Vilone, G. & Longo, L. (2020). Explainable artificial intelligence: a systematic review. CoRR"},{"key":"1657_CR201","doi-asserted-by":"crossref","unstructured":"Wachter, S., Mittelstadt, B. D., & Russell, C. (2017). Counterfactual explanations without opening the black box: Automated decisions and the GDPR. CoRR","DOI":"10.2139\/ssrn.3063289"},{"key":"1657_CR202","doi-asserted-by":"crossref","unstructured":"Wang, D., Devin, C., Cai, Q., Yu, F., & Darrell, T. (2019). Deep object-centric policies for autonomous driving. In ICRA","DOI":"10.1109\/ICRA.2019.8794224"},{"key":"1657_CR203","doi-asserted-by":"crossref","unstructured":"Wojek, C., Walk, S., Roth, S., & Schiele, B. (2011). Monocular 3d scene understanding with explicit occlusion reasoning. In CVPR","DOI":"10.1109\/CVPR.2011.5995547"},{"key":"1657_CR204","doi-asserted-by":"crossref","unstructured":"Wojek, C., Walk, S., Roth, S., Schindler, K., & Schiele, B. (2013). Monocular visual scene understanding: Understanding multi-object traffic scenes. TPAMI","DOI":"10.1109\/TPAMI.2012.174"},{"key":"1657_CR205","unstructured":"Xie, N., Ras, G., van Gerven, M., & Doran, D. (2020). Explainable deep learning: A field guide for the uninitiated. CoRR"},{"key":"1657_CR206","doi-asserted-by":"crossref","unstructured":"Xu, H. & Saenko, K. (2016). Ask, attend and answer: Exploring question-guided spatial attention for visual question answering. In ECCV","DOI":"10.1007\/978-3-319-46478-7_28"},{"key":"1657_CR207","unstructured":"Xu, K., Ba, J., Kiros, R., Cho, K., Courville, A. C., Salakhutdinov, R., Zemel, RS., & Bengio, Y. (2015). Show, attend and tell: Neural image caption generation with visual attention. In ICML"},{"key":"1657_CR208","doi-asserted-by":"crossref","unstructured":"Xu, H., Gao, Y., Yu, F., & Darrell, T. (2017). End-to-end learning of driving models from large-scale video datasets. In CVPR","DOI":"10.1109\/CVPR.2017.376"},{"key":"1657_CR209","doi-asserted-by":"crossref","unstructured":"Xu, Y., Yang, X., Gong, L., Lin, H., Wu, T., Li, Y., & Vasconcelos, N. (2020). Explainable object-induced action decision for autonomous vehicles. In CVPR","DOI":"10.1109\/CVPR42600.2020.00954"},{"key":"1657_CR210","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., & Smola, A. J. (2016). Stacked attention networks for image question answering. In CVPR","DOI":"10.1109\/CVPR.2016.10"},{"key":"1657_CR211","unstructured":"Yang, Z., Manivasagam, S., Liang, M., Yang, B., Ma, W., & Urtasun, R. (2020). Recovering and simulating pedestrians in the wild. CoRL"},{"key":"1657_CR212","doi-asserted-by":"crossref","unstructured":"You, T. & Han, B. (2020). Traffic accident benchmark for causality recognition. In ECCV","DOI":"10.1007\/978-3-030-58571-6_32"},{"key":"1657_CR213","doi-asserted-by":"crossref","unstructured":"Yu, F., Chen, H., Wang, X., Xian, W., Chen, Y., Liu, F., Madhavan, V., & Darrell, T. (2020). BDD100K: A diverse driving dataset for heterogeneous multitask learning. In CVPR","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"1657_CR214","unstructured":"Zahavy, T., Ben-Zrihem, N., & Mannor, S. (2016). Graying the black box: Understanding dqns. In ICML"},{"key":"1657_CR215","doi-asserted-by":"crossref","unstructured":"Zeiler, M. D., & Fergus, R. (2014). Visualizing and understanding convolutional networks. In ECCV","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"1657_CR216","doi-asserted-by":"crossref","unstructured":"Zellers, R., Bisk, Y., Farhadi, A., & Choi, Y. (2019). From recognition to cognition: Visual commonsense reasoning. In CVPR","DOI":"10.1109\/CVPR.2019.00688"},{"key":"1657_CR217","doi-asserted-by":"crossref","unstructured":"Zeng, K., Chou, S., Chan, F., Niebles, J.C., & Sun, M. (2017). Agent-centric risk assessment: Accident anticipation and risky region localization. In CVPR","DOI":"10.1109\/CVPR.2017.146"},{"key":"1657_CR218","doi-asserted-by":"crossref","unstructured":"Zeng, W., Luo, W., Suo, S., Sadat, A., Yang, B., Casas, S., & Urtasun, R. (2019). End-to-end interpretable neural motion planner. In CVPR","DOI":"10.1109\/CVPR.2019.00886"},{"key":"1657_CR219","doi-asserted-by":"crossref","unstructured":"Zhang, Q., & Zhu, S. (2018). Visual interpretability for deep learning: a survey. Frontiers of Information Technology & Electronic Engineering","DOI":"10.1631\/FITEE.1700808"},{"key":"1657_CR220","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Cao, R., Shi, F., Wu, Y. N., & Zhu, S. (2018a). Interpreting CNN knowledge via an explanatory graph. In AAAI","DOI":"10.1609\/aaai.v32i1.11819"},{"key":"1657_CR221","doi-asserted-by":"crossref","unstructured":"Zhang, H., Geiger, A., & Urtasun, R. (2013). Understanding high-level semantics by modeling traffic patterns. In: ICCV","DOI":"10.1109\/ICCV.2013.379"},{"key":"1657_CR222","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Wu, YN., & Zhu, S. (2018b). Interpretable convolutional neural networks. In CVPR","DOI":"10.1109\/CVPR.2018.00920"},{"key":"1657_CR223","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Yang, X. J., & Robert, L. P. (2020). Expectations and trust in automated vehicles. In CHI","DOI":"10.1145\/3334480.3382986"},{"key":"1657_CR224","doi-asserted-by":"crossref","unstructured":"Zhao, B., Yin, W., Meng, L., & Sigal, L. (2020). Layout2image: Image generation from layout. IJCV","DOI":"10.1007\/s11263-020-01300-7"},{"key":"1657_CR225","unstructured":"Zhou, B., Khosla, A., Lapedriza, \u00c0., Oliva, A., & Torralba, A. (2015a). Object detectors emerge in deep scene cnns. In ICLR"},{"key":"1657_CR226","unstructured":"Zhou, B., Khosla, A., Lapedriza, \u00c0., Oliva, A., & Torralba, A. (2015b). Object detectors emerge in deep scene cnns. In ICLR"},{"key":"1657_CR227","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, \u00c0., Oliva, A., & Torralba, A. (2016). Learning deep features for discriminative localization. In CVPR","DOI":"10.1109\/CVPR.2016.319"},{"key":"1657_CR228","doi-asserted-by":"crossref","unstructured":"Zhou, B., Kr\u00e4henb\u00fchl, P., & Koltun, V. (2019). Does computer vision matter for action? Scientific Robotics","DOI":"10.1126\/scirobotics.aaw6661"},{"key":"1657_CR229","doi-asserted-by":"crossref","unstructured":"Zilke, J. R., Menc\u00eda, E. L., & Janssen, F. (2016). Deepred - rule extraction from deep neural networks. In DS","DOI":"10.1007\/978-3-319-46307-0_29"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-022-01657-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-022-01657-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-022-01657-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T22:25:58Z","timestamp":1727735158000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-022-01657-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,7]]},"references-count":229,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2022,10]]}},"alternative-id":["1657"],"URL":"https:\/\/doi.org\/10.1007\/s11263-022-01657-x","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,7]]},"assertion":[{"value":"19 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 July 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 August 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}]}}