{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T04:44:19Z","timestamp":1761972259495,"version":"build-2065373602"},"reference-count":33,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"11","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2025,11,1]]},"DOI":"10.1587\/transinf.2024edp7292","type":"journal-article","created":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T18:08:23Z","timestamp":1747246103000},"page":"1381-1391","source":"Crossref","is-referenced-by-count":0,"title":["Interpreting Attention Mechanisms of NMT with Linguistic Features"],"prefix":"10.1587","volume":"E108.D","author":[{"given":"Guanghui","family":"CAI","sequence":"first","affiliation":[{"name":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology"},{"name":"Yunnan Key Laboratory of Artificial Intelligence, Kunming University of Science and Technology"}]},{"given":"Junguo","family":"ZHU","sequence":"additional","affiliation":[{"name":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology"},{"name":"Yunnan Key Laboratory of Artificial Intelligence, Kunming University of Science and Technology"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] J. Hessel, A. Marasovic, J.D. Hwang, L. Lee, J. Da, R. Zellers, R. Mankoff, and Y. Choi, \u201cDo androids laugh at electric sheep? humor \u201cunderstanding\u201d benchmarks from the new yorker caption contest,\u201d Proc. 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Toronto, pp.688-714, July 2023. 10.18653\/v1\/2023.acl-long.41","DOI":"10.18653\/v1\/2023.acl-long.41"},{"key":"2","unstructured":"[2] D. Bahdanau, K. Cho, and Y. Bengio, \u201cNeural machine translation by jointly learning to align and translate,\u201d Proc. 3rd International Conference on Learning Representations (ICLR 2015), Jan. 2015."},{"key":"3","doi-asserted-by":"crossref","unstructured":"[3] S. Abnar and W. Zuidema, \u201cQuantifying attention flow in transformers,\u201d Proc. 58th Annual Meeting of the Association for Computational Linguistics, Online, pp.4190-4197, July 2020. 10.18653\/v1\/2020.acl-main.385","DOI":"10.18653\/v1\/2020.acl-main.385"},{"key":"4","doi-asserted-by":"crossref","unstructured":"[4] W. He, Y. Wu, and X. Li, \u201cAttention mechanism for neural machine translation: A survey,\u201d 2021 IEEE 5th Information Technology, Networking, Electronic and Automation Control Conference (ITNEC), pp.1485-1489, IEEE, 2021. 10.1109\/itnec52019.2021.9586824","DOI":"10.1109\/ITNEC52019.2021.9586824"},{"key":"5","doi-asserted-by":"crossref","unstructured":"[5] W. Ma, K. Zhang, R. Lou, L. Wang, and S. Vosoughi, \u201cContributions of transformer attention heads in multi- and cross-lingual tasks,\u201d Proc. 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), Online, pp.1956-1966, Aug. 2021. 10.18653\/v1\/2021.acl-long.152","DOI":"10.18653\/v1\/2021.acl-long.152"},{"key":"6","doi-asserted-by":"crossref","unstructured":"[6] A.K. Mohankumar, P. Nema, S. Narasimhan, M.M. Khapra, B.V. Srinivasan, and B. Ravindran, \u201cTowards transparent and explainable attention models,\u201d Proc. 58th Annual Meeting of the Association for Computational Linguistics, Online, pp.4206-4216, July 2020. 10.18653\/v1\/2020.acl-main.387","DOI":"10.18653\/v1\/2020.acl-main.387"},{"key":"7","doi-asserted-by":"publisher","unstructured":"[7] V. Hassija, V. Chamola, A. Mahapatra, A. Singal, D. Goel, K. Huang, S. Scardapane, I. Spinelli, M. Mahmud, and A. Hussain, \u201cInterpreting black-box models: A review on explainable artificial intelligence,\u201d Cognitive Computation, vol.16, no.1, pp.45-74, 2024. 10.1007\/s12559-023-10179-8","DOI":"10.1007\/s12559-023-10179-8"},{"key":"8","doi-asserted-by":"publisher","unstructured":"[8] S. Luo, H. Ivison, S.C. Han, and J. Poon, \u201cLocal interpretations for explainable natural language processing: A survey,\u201d ACM Computing Surveys, vol.56, no.9, pp.1-36, 2024. 10.1145\/3649450","DOI":"10.1145\/3649450"},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] A. Holzinger, A. Saranti, C. Molnar, P. Biecek, and W. Samek, \u201cExplainable ai methods-a brief overview,\u201d International workshop on extending explainable AI beyond deep models and classifiers, pp.13-38, Springer, 2022. 10.1007\/978-3-031-04083-2_2","DOI":"10.1007\/978-3-031-04083-2_2"},{"key":"10","doi-asserted-by":"crossref","unstructured":"[10] W. Wang and Z. Tu, \u201cRethinking the value of transformer components,\u201d Proc. 28th Int. Conf. Computational Linguistics, Barcelona, Spain (Online), pp.6019-6029, International Committee on Computational Linguistics, Dec. 2020. 10.18653\/v1\/2020.coling-main.529","DOI":"10.18653\/v1\/2020.coling-main.529"},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] J. Vig and Y. Belinkov, \u201cAnalyzing the structure of attention in a transformer language model,\u201d Proc. 2019 ACL Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP, Florence, Italy, pp.63-76, Aug. 2019. 10.18653\/v1\/w19-4808","DOI":"10.18653\/v1\/W19-4808"},{"key":"12","unstructured":"[12] J. Hewitt and C.D. Manning, \u201cA structural probe for finding syntax in word representations,\u201d Proc. 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp.4129-4138, 2019."},{"key":"13","doi-asserted-by":"publisher","unstructured":"[13] C.D. Manning, K. Clark, J. Hewitt, U. Khandelwal, and O. Levy, \u201cEmergent linguistic structure in artificial neural networks trained by self-supervision,\u201d Proc. National Academy of Sciences, vol.117, no.48, pp.30046-30054, 2020. 10.1073\/pnas.1907367117","DOI":"10.1073\/pnas.1907367117"},{"key":"14","doi-asserted-by":"crossref","unstructured":"[14] A. Raganato and J. Tiedemann, \u201cAn analysis of encoder representations in transformer-based machine translation,\u201d Proc. 2018 EMNLP workshop BlackboxNLP: analyzing and interpreting neural networks for NLP, pp.287-297, 2018. 10.18653\/v1\/w18-5431","DOI":"10.18653\/v1\/W18-5431"},{"key":"15","doi-asserted-by":"publisher","unstructured":"[15] Y. Belinkov, \u201cProbing classifiers: Promises, shortcomings, and advances,\u201d Computational Linguistics, vol.48, no.1, pp.207-219, 2022. 10.1162\/coli_a_00422","DOI":"10.1162\/coli_a_00422"},{"key":"16","unstructured":"[16] P. Michel, O. Levy, and G. Neubig, \u201cAre sixteen heads really better than one?,\u201d Advances in neural information processing systems, vol.32, 2019."},{"key":"17","doi-asserted-by":"publisher","unstructured":"[17] Y. Zhang, P. Ti\u0148o, A. Leonardis, and K. Tang, \u201cA survey on neural network interpretability,\u201d IEEE Trans. Emerging Topics in Computational Intelligence, vol.5, no.5, pp.726-742, 2021. 10.1109\/tetci.2021.3100641","DOI":"10.1109\/TETCI.2021.3100641"},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] T. R\u00e4uker, A. Ho, S. Casper, and D. Hadfield-Menell, \u201cToward transparent ai: A survey on interpreting the inner structures of deep neural networks,\u201d 2023 ieee conference on secure and trustworthy machine learning (SaTML), pp.464-483, IEEE, 2023. 10.1109\/satml54575.2023.00039","DOI":"10.1109\/SaTML54575.2023.00039"},{"key":"19","doi-asserted-by":"publisher","unstructured":"[19] F. Dalvi, N. Durrani, H. Sajjad, Y. Belinkov, A. Bau, and J. Glass, \u201cWhat is one grain of sand in the desert? analyzing individual neurons in deep nlp models,\u201d Proc. AAAI Conference on Artificial Intelligence, vol.33, no.01, pp.6309-6317, 2019. 10.1609\/aaai.v33i01.33016309","DOI":"10.1609\/aaai.v33i01.33016309"},{"key":"20","unstructured":"[20] N. Durrani, F. Dalvi, and H. Sajjad, \u201cDiscovering salient neurons in deep nlp models,\u201d Journal of Machine Learning Research, vol.24, no.362, pp.1-40, 2023."},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] S. Serrano and N.A. Smith, \u201cIs attention interpretable?,\u201d Proc. 57th Annual Meeting of the Association for Computational Linguistics, Florence, Italy, July 2019. 10.18653\/v1\/p19-1282","DOI":"10.18653\/v1\/P19-1282"},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] X. Li, G. Li, L. Liu, M. Meng, and S. Shi, \u201cOn the word alignment from neural machine translation,\u201d Proc. 57th Annual Meeting of the Association for Computational Linguistics, pp.1293-1303, 2019. 10.18653\/v1\/p19-1124","DOI":"10.18653\/v1\/P19-1124"},{"key":"23","doi-asserted-by":"crossref","unstructured":"[23] S. Tan, Y. Shen, Z. Chen, A. Courville, and C. Gan, \u201cSparse universal transformer,\u201d Proc. 2023 Conference on Empirical Methods in Natural Language Processing, Singapore, pp.169-179, Dec. 2023. 10.18653\/v1\/2023.emnlp-main.12","DOI":"10.18653\/v1\/2023.emnlp-main.12"},{"key":"24","doi-asserted-by":"crossref","unstructured":"[24] K. Clark, U. Khandelwal, O. Levy, and C.D. Manning, \u201cWhat does BERT look at? an analysis of BERT\u2019s attention,\u201d Proc. 2019 ACL Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP, 2019. 10.18653\/v1\/w19-4828","DOI":"10.18653\/v1\/W19-4828"},{"key":"25","unstructured":"[25] J.-Y. Jo and S.-H. Myaeng, \u201cRoles and utilization of attention heads in transformer-based neural language models,\u201d Proc. 58th Annual Meeting of the Association for Computational Linguistics, 2020. 10.18653\/v1\/2020.acl-main.311"},{"key":"26","doi-asserted-by":"crossref","unstructured":"[26] E. Voita, D. Talbot, F. Moiseev, R. Sennrich, and I. Titov, \u201cAnalyzing multi-head self-attention: Specialized heads do the heavy lifting, the rest can be pruned,\u201d Proc. 57th Annual Meeting of the Association for Computational Linguistics, 2019. 10.18653\/v1\/p19-1580","DOI":"10.18653\/v1\/P19-1580"},{"key":"27","doi-asserted-by":"crossref","unstructured":"[27] M. Behnke and K. Heafield, \u201cLosing heads in the lottery: Pruning transformer attention in neural machine translation,\u201d Proc. 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), 2020. 10.18653\/v1\/2020.emnlp-main.211","DOI":"10.18653\/v1\/2020.emnlp-main.211"},{"key":"28","doi-asserted-by":"crossref","unstructured":"[28] E. Akyurek and J. Andreas, \u201cLexSym: Compositionality as lexical symmetry,\u201d Proc. 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp.639-657, 2023. 10.18653\/v1\/2023.acl-long.38","DOI":"10.18653\/v1\/2023.acl-long.38"},{"key":"29","doi-asserted-by":"crossref","unstructured":"[29] K. Yin and G. Neubig, \u201cInterpreting language models with contrastive explanations,\u201d Proc. 2022 Conference on Empirical Methods in Natural Language Processing, 2022. 10.18653\/v1\/2022.emnlp-main.14","DOI":"10.18653\/v1\/2022.emnlp-main.14"},{"key":"30","doi-asserted-by":"crossref","unstructured":"[30] M. M\u00fcller, Z. Jiang, A. Moryossef, A. Rios, and S. Ebling, \u201cConsiderations for meaningful sign language machine translation based on glosses,\u201d Proc. 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp.682-693, 2023. 10.18653\/v1\/2023.acl-short.60","DOI":"10.18653\/v1\/2023.acl-short.60"},{"key":"31","doi-asserted-by":"crossref","unstructured":"[31] M. Marcus, B. Santorini, and M.A. Marcinkiewicz, \u201cBuilding a large annotated corpus of english: The penn treebank,\u201d Computational linguistics, vol.19, no.2, pp.313-330, 1993. 10.21236\/ada273556","DOI":"10.21236\/ADA273556"},{"key":"32","doi-asserted-by":"publisher","unstructured":"[32] K. Vahldiek and F. Klawonn, \u201cCluster-centered visualization techniques for fuzzy clustering results to judge single clusters,\u201d Applied Sciences, vol.14, no.3, p.1102, 2024. 10.3390\/app14031102","DOI":"10.3390\/app14031102"},{"key":"33","unstructured":"[33] E. Aky\u00fcrek, D. Schuurmans, J. Andreas, T. Ma, and D. Zhou, \u201cWhat learning algorithm is in-context learning? investigations with linear models,\u201d The Eleventh Int. Conf. Learning Representations, 2023."}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E108.D\/11\/E108.D_2024EDP7292\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T03:36:07Z","timestamp":1761968167000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E108.D\/11\/E108.D_2024EDP7292\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,1]]},"references-count":33,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2024edp7292","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"type":"print","value":"0916-8532"},{"type":"electronic","value":"1745-1361"}],"subject":[],"published":{"date-parts":[[2025,11,1]]},"article-number":"2024EDP7292"}}