{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T05:47:06Z","timestamp":1771998426915,"version":"3.50.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T00:00:00Z","timestamp":1657584000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T00:00:00Z","timestamp":1657584000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100004735","name":"Natural Science Foundation of&nbsp;Hunan Province","doi-asserted-by":"publisher","award":["2020JJ5367"],"award-info":[{"award-number":["2020JJ5367"]}],"id":[{"id":"10.13039\/501100004735","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s10489-022-03788-7","type":"journal-article","created":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T09:56:49Z","timestamp":1657619809000},"page":"6936-6952","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Interpreting a deep reinforcement learning model with conceptual embedding and performance analysis"],"prefix":"10.1007","volume":"53","author":[{"given":"Yinglong","family":"Dai","sequence":"first","affiliation":[]},{"given":"Haibin","family":"Ouyang","sequence":"additional","affiliation":[]},{"given":"Hong","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Han","family":"Long","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2999-0541","authenticated-orcid":false,"given":"Xiaojun","family":"Duan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,7,12]]},"reference":[{"issue":"7540","key":"3788_CR1","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"3788_CR2","unstructured":"Lillicrap T, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: International conference on learning representations (ICLR), pp 1\u201310"},{"key":"3788_CR3","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: Proceedings of The 33rd international conference on machine learning, vol 48, PMLR, pp 1928\u2013 1937"},{"key":"3788_CR4","unstructured":"Schulman J, Levine S, Abbeel P, Jordan M, Moritz P (2015) Trust region policy optimization. In: Proceedings of The 32rd international conference on machine learning, PMLR, pp 1889\u20131897"},{"key":"3788_CR5","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor"},{"issue":"7676","key":"3788_CR6","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T, Baker L, Lai M, Bolton A et al (2017) Mastering the game of go without human knowledge. Nature 550 (7676):354\u2013359","journal-title":"Nature"},{"issue":"7782","key":"3788_CR7","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals O, Babuschkin I, Czarnecki WM, Mathieu M, Silver D (2019) Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575(7782):350\u2013354","journal-title":"Nature"},{"key":"3788_CR8","doi-asserted-by":"publisher","first-page":"107602","DOI":"10.1016\/j.asoc.2021.107602","volume":"110","author":"P Zieli\u0144ski","year":"2021","unstructured":"Zieli\u0144ski P, Markowska-kaczmar U (2021) 3D robotic navigation using a vision-based deep reinforcement learning model. Appl Soft Comput 110:107602","journal-title":"Appl Soft Comput"},{"key":"3788_CR9","doi-asserted-by":"publisher","first-page":"107601","DOI":"10.1016\/j.asoc.2021.107601","volume":"110","author":"S Saeedvand","year":"2021","unstructured":"Saeedvand S, Mandala H, Baltes J (2021) Hierarchical deep reinforcement learning to drag heavy objects by adult-sized humanoid robot. Appl Soft Comput 110:107601","journal-title":"Appl Soft Comput"},{"key":"3788_CR10","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1016\/j.neucom.2021.08.023","volume":"462","author":"R Jiang","year":"2021","unstructured":"Jiang R, Wang Z, He B, Zhou Y, Li G, Zhu Z (2021) A data-efficient goal-directed deep reinforcement learning method for robot visuomotor skill. Neurocomputing 462:389\u2013401","journal-title":"Neurocomputing"},{"key":"3788_CR11","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1016\/j.neucom.2021.06.075","volume":"459","author":"R Zhang","year":"2021","unstructured":"Zhang R, Wang Z, Zheng M, Zhao Y, Huang Z (2021) Emotion-sensitive deep dyna-q learning for task-completion dialogue policy learning. Neurocomputing 459:122\u2013130","journal-title":"Neurocomputing"},{"key":"3788_CR12","doi-asserted-by":"publisher","first-page":"108292","DOI":"10.1016\/j.knosys.2022.108292","volume":"242","author":"A Tiwari","year":"2022","unstructured":"Tiwari A, Saha S, Bhattacharyya P (2022) A knowledge infused context driven dialogue agent for disease diagnosis using hierarchical reinforcement learning. Knowl-Based Syst 242:108292","journal-title":"Knowl-Based Syst"},{"key":"3788_CR13","doi-asserted-by":"publisher","first-page":"101964","DOI":"10.1016\/j.artmed.2020.101964","volume":"109","author":"A Coronato","year":"2020","unstructured":"Coronato A, Naeem M, De Pietro G, Paragliola G (2020) Reinforcement learning for intelligent healthcare applications: a survey. Artif Intell Med 109:101964","journal-title":"Artif Intell Med"},{"key":"3788_CR14","doi-asserted-by":"publisher","first-page":"102193","DOI":"10.1016\/j.artmed.2021.102193","volume":"121","author":"S Ebrahimi","year":"2021","unstructured":"Ebrahimi S, Lim GJ (2021) A reinforcement learning approach for finding optimal policy of adaptive radiation therapy considering uncertain tumor biological response. Artif Intell Med 121:102193","journal-title":"Artif Intell Med"},{"key":"3788_CR15","doi-asserted-by":"crossref","unstructured":"Ciampi M, Coronato A, Naeem M, Silvestri S (2022) An intelligent environment for preventing medication errors in home treatment. Expert Systems with Applications 116434","DOI":"10.1016\/j.eswa.2021.116434"},{"issue":"2","key":"3788_CR16","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1109\/TAI.2021.3111139","volume":"3","author":"I Ilahi","year":"2022","unstructured":"Ilahi I, Usama M, Qadir J, Janjua MU, Al-Fuqaha A, Huang DT, Niyato D (2022) Challenges and countermeasures for adversarial attacks on deep reinforcement learning. IEEE Transactions on Artificial Intelligence 3(2):90\u2013109","journal-title":"IEEE Transactions on Artificial Intelligence"},{"key":"3788_CR17","doi-asserted-by":"publisher","first-page":"106685","DOI":"10.1016\/j.knosys.2020.106685","volume":"214","author":"A Heuillet","year":"2021","unstructured":"Heuillet A, Couthouis F, D\u00edaz-rodr\u00edguez N (2021) Explainability in deep reinforcement learning. Knowledge-Based Systems 214:106685","journal-title":"Knowledge-Based Systems"},{"key":"3788_CR18","doi-asserted-by":"publisher","unstructured":"Chen J, Li SE, Tomizuka M (2021) Interpretable end-to-end urban autonomous driving with latent deep reinforcement learning. IEEE Trans Intell Transp Syst, pp 1\u201311. https:\/\/doi.org\/10.1109\/TITS.2020.3046646","DOI":"10.1109\/TITS.2020.3046646"},{"key":"3788_CR19","unstructured":"Greydanus S, Koul A, Dodge J, Fern A (2018) Visualizing and understanding Atari agents. In: Dy J, Krause A (eds) Proceedings of the 35th international conference on machine learning, vol 80, PMLR, pp 1792\u20131801. http:\/\/proceedings.mlr.press\/v80\/greydanus18a.html"},{"key":"3788_CR20","unstructured":"Puri N, Verma S, Gupta P, Kayastha D, Deshmukh S, Krishnamurthy B, Singh S (2020) Explain your move: Understanding agent actions using specific and relevant feature attribution. In: International conference on learning representations, pp 1\u201314"},{"key":"3788_CR21","unstructured":"Zahavy T, Ben-Zrihem N, Mannor S (2016) Graying the black box: Understanding DQNs. In: Balcan MF, Weinberger KQ (eds) Proceedings of The 33rd international conference on machine learning, vol 48, PMLR, pp 1899\u20131908"},{"key":"3788_CR22","unstructured":"Simonyan K, Vedaldi A, Zisserman A (2014) Deep inside convolutional networks: Visualising image classification models and saliency maps. In: International conference on learning representations (ICLR). arXiv:1312.6034"},{"key":"3788_CR23","unstructured":"Shrikumar A, Greenside P, Kundaje A (2017) Learning important features through propagating activation differences. In: Proceedings of the 34th international conference on machine learning, vol 70, PMLR, pp 3145\u20133153. http:\/\/proceedings.mlr.press\/v70\/shrikumar17a.html"},{"issue":"8","key":"3788_CR24","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1007\/s11263-019-01228-7","volume":"128","author":"RR Selvaraju","year":"2020","unstructured":"Selvaraju RR, Cogswell M, Das A, Vedantam R, Batra D (2020) Grad-cam: Visual explanations from deep networks via gradient-based localization. Int J Comput Vis 128(8):336\u2013359","journal-title":"Int J Comput Vis"},{"key":"3788_CR25","doi-asserted-by":"publisher","unstructured":"Fong RC, Vedaldi A (2017) Interpretable explanations of black boxes by meaningful perturbation. In: IEEE International conference on computer vision (ICCV), IEEE Computer Society, pp 3449\u20133457. https:\/\/doi.org\/10.1109\/ICCV.2017.371","DOI":"10.1109\/ICCV.2017.371"},{"key":"3788_CR26","doi-asserted-by":"crossref","unstructured":"Iyer R, Li Y, Li H, Lewis M, Sundar R, Sycara K (2018) Transparency and explanation in deep reinforcement learning neural networks. In: AAAI\/ACM Conference on artificial intelligence, ethics, and society, new orleans, LA, pp 144\u2013150","DOI":"10.1145\/3278721.3278776"},{"key":"3788_CR27","doi-asserted-by":"crossref","unstructured":"Madumal P, Miller T, Sonenberg L, Vetere F (2020) Explainable reinforcement learning through a causal lens. In: Proceedings of the AAAI conference on artificial intelligence, vol 34, pp 2493\u20132500","DOI":"10.1609\/aaai.v34i03.5631"},{"key":"3788_CR28","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1016\/j.neucom.2022.01.086","volume":"482","author":"TD Duong","year":"2022","unstructured":"Duong TD, Li Q, Xu G (2022) Stochastic intervention for causal inference via reinforcement learning. Neurocomputing 482:40\u201349","journal-title":"Neurocomputing"},{"key":"3788_CR29","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: An Introduction. MIT press"},{"issue":"11","key":"3788_CR30","doi-asserted-by":"publisher","first-page":"4050","DOI":"10.1007\/s10489-020-01748-7","volume":"50","author":"DQ Nguyen","year":"2020","unstructured":"Nguyen DQ, Vien NA, Dang V-H, Chung T (2020) Asynchronous framework with reptile+ algorithm to meta learn partially observable markov decision process. Appl Intell 50(11):4050\u20134062","journal-title":"Appl Intell"},{"key":"3788_CR31","doi-asserted-by":"publisher","first-page":"110231","DOI":"10.1016\/j.automatica.2022.110231","volume":"140","author":"W Zheng","year":"2022","unstructured":"Zheng W, Jung T, Lin H (2022) The stackelberg equilibrium for one-sided zero-sum partially observable stochastic games. Automatica 140:110231","journal-title":"Automatica"},{"key":"3788_CR32","doi-asserted-by":"publisher","first-page":"103645","DOI":"10.1016\/j.artint.2021.103645","volume":"303","author":"V Kova\u0159\u00edk","year":"2022","unstructured":"Kova\u0159\u00edk V, Schmid M, Burch N, Bowling M, Lisy\u0300 V (2022) Rethinking formal models of partially observable multiagent decision making. Artif Intell 303:103645","journal-title":"Artif Intell"},{"key":"3788_CR33","doi-asserted-by":"crossref","unstructured":"Pang Z-J, Liu R-Z, Meng Z-Y, Zhang Y, Yu Y, Lu T (2019) On reinforcement learning for full-length game of starcraft. In: Proceedings of the AAAI conference on artificial intelligence, vol 33, pp 4691\u20134698","DOI":"10.1609\/aaai.v33i01.33014691"},{"issue":"3","key":"3788_CR34","doi-asserted-by":"publisher","first-page":"1631","DOI":"10.3233\/JIFS-169457","volume":"34","author":"Y Dai","year":"2018","unstructured":"Dai Y, Wang G, Li K-C (2018) Conceptual alignment deep neural networks. Journal of Intelligent & Fuzzy Systems 34(3):1631\u20131642","journal-title":"Journal of Intelligent & Fuzzy Systems"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03788-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-03788-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03788-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,27]],"date-time":"2023-02-27T04:35:57Z","timestamp":1677472557000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-03788-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,12]]},"references-count":34,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["3788"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-03788-7","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,7,12]]},"assertion":[{"value":"18 May 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 July 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}