{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T16:40:56Z","timestamp":1769272856504,"version":"3.49.0"},"reference-count":62,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100008050","name":"Ho Chi Minh City University of Education Foundation for Science and Technology","doi-asserted-by":"publisher","award":["CS.2024.19.36"],"award-info":[{"award-number":["CS.2024.19.36"]}],"id":[{"id":"10.13039\/501100008050","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3584128","type":"journal-article","created":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T17:39:16Z","timestamp":1751305156000},"page":"112528-112551","source":"Crossref","is-referenced-by-count":2,"title":["Integrating Abstract Meaning Representation to Enhance Transformer-Based Image Captioning"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7543-5207","authenticated-orcid":false,"given":"Nguyen","family":"Van Thinh","sequence":"first","affiliation":[{"name":"Vietnam Academy of Science and Technology (VAST), Graduate University of Science and Technology, Hanoi, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8925-5549","authenticated-orcid":false,"given":"Tran","family":"Lang","sequence":"additional","affiliation":[{"name":"Journal Editorial Department, Ho Chi Minh City University of Foreign Languages and Information Technology (HUFLIT), Ho Chi Minh City, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8408-2004","authenticated-orcid":false,"given":"Van","family":"The Thanh","sequence":"additional","affiliation":[{"name":"Faculty of Information Technology, Ho Chi Minh City University of Education (HCMUE), Ho Chi Minh City, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-024-18307-8"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.119773"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E357697.2023.10262494"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2024.109288"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-024-18966-7"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2024.105863"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.iswa.2025.200489"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-15555-y"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-16560-x"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"ref11","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"Xu","year":"2015","journal-title":"arXiv:1502.03044"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.15625\/1813-9663\/20929"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2018.12.027"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104591"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01094"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICME51207.2021.9428310"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-13793-0"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2025.117273"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3522585"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-69538-5_10"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01098"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01746"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3356551"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00036"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3268744"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6731"},{"issue":"12","key":"ref28","first-page":"1234","article-title":"Rgtrancnet: Effective image captioning model using cross-attention and semantic knowledge","volume":"61","author":"Thinh","year":"2023","journal-title":"Vietnam J. Sci. Technol."},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/InCACCT61598.2024.10551257"},{"key":"ref30","first-page":"178","article-title":"Abstract meaning representation for sembanking","volume-title":"Proc. 7th Linguistic Annotation Workshop Interoperability Discourse","author":"Banarescu"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.390"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2023.117071"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCMC51019.2021.9418414"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CISS50987.2021.9400209"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.3390\/app13137916"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.15625\/vap.2023.0063"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.3389\/fnins.2023.1270850"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3431091"},{"key":"ref39","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv:2010.11929"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01059"},{"key":"ref43","first-page":"404","article-title":"Enhancing the efficiency of image annotation using transformer networks and the conceptnet knowledge base","volume-title":"Proc. Nat. Workshop Sel. Issues Inf. Commun. Technol.","author":"Thinh"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-61377-8_31"},{"key":"ref45","first-page":"13633","article-title":"ReCAP: Semantic role enhanced caption generation","volume-title":"Proc. Joint Int. Conf. Comput. Linguistics, Lang. Resour. Eval. (LREC-COLING)","author":"Bhattacharyya"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCE59016.2024.10444443"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72848-8_26"},{"key":"ref48","first-page":"1024","article-title":"Inductive representation learning on large graphs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hamilton"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1014"},{"key":"ref50","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. Conf. North Amer. Chapter Assoc. Comput. linguistics, Human Lang. Technol.","author":"Devlin"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00611"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref56","first-page":"1533","article-title":"Deep fragment embeddings for bidirectional image sentence mapping","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Karpathy"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref58","first-page":"65","article-title":"METEOR: An automatic metric for MT evaluation with improved correlation with human judgments","volume-title":"Proc. ACL Workshop Intrinsic Extrinsic Eval. Measures Mach. Transl. Summarization","author":"Banerjee"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref60","first-page":"74","article-title":"ROUGE: A package for automatic evaluation of summaries","volume-title":"Proc. Text Summarization Branches Out","author":"Lin"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/11058972.pdf?arnumber=11058972","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,5]],"date-time":"2025-07-05T04:38:40Z","timestamp":1751690320000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11058972\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":62,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3584128","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}