{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T21:10:53Z","timestamp":1768943453078,"version":"3.49.0"},"reference-count":34,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2017YFE0135700"],"award-info":[{"award-number":["2017YFE0135700"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"European Union-NextGenerationEU through the National Recovery and Resilience Plan of the Republic of Bulgaria","award":["DUECOS BG-RRP-2.004-0001-C01"],"award-info":[{"award-number":["DUECOS BG-RRP-2.004-0001-C01"]}]},{"DOI":"10.13039\/501100001635","name":"University of Limerick, Ireland","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001635","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/access.2026.3651636","type":"journal-article","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T22:02:28Z","timestamp":1768255348000},"page":"6029-6042","source":"Crossref","is-referenced-by-count":0,"title":["FD-DeCap: A Front-Door Causal Inference-Based Framework for Debiasing Automatic Audio Captioning"],"prefix":"10.1109","volume":"14","author":[{"given":"Jinyun","family":"Liu","sequence":"first","affiliation":[{"name":"College of Artificial Intelligence, North China University of Science and Technology, Tangshan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5390-5448","authenticated-orcid":false,"given":"Hui","family":"Li","sequence":"additional","affiliation":[{"name":"College of Artificial Intelligence, North China University of Science and Technology, Tangshan, China"}]},{"given":"Mingjun","family":"Wei","sequence":"additional","affiliation":[{"name":"College of Artificial Intelligence, North China University of Science and Technology, Tangshan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3527-3773","authenticated-orcid":false,"given":"Zhanlin","family":"Ji","sequence":"additional","affiliation":[{"name":"College of Mathematics and Computer Science, Zhejiang A&#x0026;F University, Hangzhou, China"}]},{"given":"Haiyang","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Advanced Technology, Xi&#x2019;an Jiaotong-Liverpool University, Suzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0535-7087","authenticated-orcid":false,"given":"Ivan","family":"Ganchev","sequence":"additional","affiliation":[{"name":"TRC\/ECE, University of Limerick, Limerick, Ireland"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.31763\/ijrcs.v2i4.888"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.physd.2019.132306"},{"key":"ref3","first-page":"21","article-title":"Audio captioning based on transformer and pre-trained CNN","volume-title":"Proc. DCAS","author":"Chen"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890325"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3189536"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448115"},{"key":"ref7","first-page":"1","article-title":"Automated audio captioning with keywords guidance","volume-title":"Proc. Conf. Detection Classification Acoust.","author":"Mei"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.23919\/APSIPAASC55919.2022.9980325"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413982"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO55093.2022.9909761"},{"key":"ref11","first-page":"1","article-title":"CP-JKU\u2019s submission to task 6a of the DCASE2022 challenge: A BART encoder\u2013decoder for automatic audio captioning trained via the reinforce algorithm and transfer learning","volume-title":"Proc. Conf. Detection Classification Acoust.","author":"Primus"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1136\/jech.2003.008466"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.2017.1398657"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1111\/obes.12598"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1080\/2153599X.2021.2001259"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3390\/info14020137"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP55844.2023.10285967"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.cognition.2021.104627"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.2307\/3033889"},{"key":"ref20","article-title":"BART: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension","author":"Lewis","year":"2019","journal-title":"arXiv:1910.13461"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3030497"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01751"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00972"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-3207"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052990"},{"key":"ref26","first-page":"119","article-title":"AudioCaps: Generating captions for audios in the wild","volume-title":"Proc. Conf. North Am. Chapter Assoc. Comput. Linguistics, Human Lang. Technol.","author":"Kim"},{"key":"ref27","first-page":"65","article-title":"METEOR: An automatic metric for MT evaluation with improved correlation with human judgments","volume-title":"Proc. ACL Wkshp. Intrinsic Extrinsic Eval. Meas. Mach. Transl. Summarization.","author":"Banerjee"},{"key":"ref28","first-page":"74","article-title":"ROUGE: A package for automatic evaluation of summaries","volume-title":"Proc. Text Summarization Branches Out.","author":"Lin"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.100"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448030"},{"key":"ref33","article-title":"CL4AC: A contrastive loss for audio captioning","author":"Liu","year":"2021","journal-title":"arXiv:2107.09990"},{"key":"ref34","article-title":"Audio captioning transformer","author":"Mei","year":"2021","journal-title":"arXiv:2107.09817"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/11323511\/11333308.pdf?arnumber=11333308","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T07:26:10Z","timestamp":1768893970000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11333308\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/access.2026.3651636","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}