{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T18:59:16Z","timestamp":1762455556671,"version":"build-2065373602"},"reference-count":89,"publisher":"Tsinghua University Press","issue":"5","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Comp. Visual. Med."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.26599\/cvm.2025.9450401","type":"journal-article","created":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T13:50:16Z","timestamp":1751637016000},"page":"983-1003","source":"Crossref","is-referenced-by-count":0,"title":["BDA: Bi-Directional Attention for Zero-Shot Learning"],"prefix":"10.26599","volume":"11","author":[{"given":"Junseok","family":"Lee","sequence":"first","affiliation":[{"name":"Dongguk University,Seoul,Republic of Korea,04620"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinming","family":"Cao","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore,Singapore,Singapore,119077"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yifang","family":"Yin","sequence":"additional","affiliation":[{"name":"Institute for Infocomm Research, A*STAR,Singapore,Singapore,13632"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jihie","family":"Kim","sequence":"additional","affiliation":[{"name":"Dongguk University,Seoul,Republic of Korea,04620"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roger","family":"Zimmermann","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore,Singapore,Singapore,119077"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seongsik","family":"Park","sequence":"additional","affiliation":[{"name":"Korean National Open University,Seoul,Republic of Korea,03087"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"11138","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00944"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00454"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00449"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.124"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00804"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00670"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.666"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.170"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.557"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.649"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00044"},{"key":"ref12","first-page":"646","article-title":"Zero-data learning of new tasks","volume-title":"Proceedings of the 23rd National Conference on Artificial Intelligence","volume":"2","author":"Larochelle","year":"2008"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206594"},{"key":"ref14","first-page":"1410","article-title":"Zeroshot learning with semantic output codes","volume-title":"Proceedings of the 23rd International Conference on Neural Information Processing Systems","author":"Palatucci","year":"2009"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.328"},{"key":"ref16","article-title":"Large language models are zero-shot reasoners","author":"Kojima","year":"2022","journal-title":"arXiv preprint"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.13"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206772"},{"key":"ref19","article-title":"Zero-shot learning through cross-modal transfer","volume-title":"arXiv preprint","author":"Socher","year":"2013"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.111"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_4"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00113"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00779"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00450"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00844"},{"key":"ref26","article-title":"HSVA: Hierarchical semantic-visual adaptation for zero-shot learning","author":"Chen","year":"2021","journal-title":"arXiv preprint"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00581"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01052"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3131222"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_33"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01613-9"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19909"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"volume-title":"The Caltech-UCSD Birds-200-2011 Dataset","year":"2011","author":"Wah","key":"ref34"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247998"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-020-0184-6"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.321"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.15"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-50077-5_2"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.575"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2487986"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00994"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00111"},{"key":"ref44","first-page":"2009","article-title":"Generalized zero-shot learning with deep calibration network","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","author":"Liu","year":"2018"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00986"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01043"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00961"},{"key":"ref48","article-title":"Semantic-guided multi-attention localization for zero-shot learning","author":"Zhu","year":"2019","journal-title":"arXiv preprint"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00680"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00379"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.308"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00460"},{"key":"ref53","article-title":"VEEGAN: Reducing mode collapse in GANs using implicit variational learning","author":"Srivastava","year":"2017","journal-title":"arXiv preprint"},{"key":"ref54","article-title":"Towards GAN benchmarks which require generalization","author":"Gulrajani","year":"2020","journal-title":"arXiv preprint"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01153"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/134"},{"key":"ref57","article-title":"Don\u2019t blame the ELBO! A linear VAE perspective on posterior collapse","author":"Lucas","year":"2019","journal-title":"arXiv preprint"},{"key":"ref58","article-title":"Neural discrete representation learning","author":"Van Den Oord","year":"2017","journal-title":"arXiv preprint"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3229526"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3155602"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00561"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00746"},{"journal-title":"arXiv preprint","article-title":"Attention is all you need","author":"Vaswani","key":"ref63"},{"key":"ref64","article-title":"Dual-stream network for visual recognition","author":"Mao","year":"2021","journal-title":"arXiv preprint"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01181"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.5244\/c.35.204"},{"key":"ref68","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","author":"Xie","year":"2021","journal-title":"arXiv preprint"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00864"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-022-0274-8"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-018-0116-x"},{"key":"ref72","article-title":"MonoDETR: Depth-guided transformer for monocular 3D object detection","author":"Zhang","year":"2022","journal-title":"arXiv preprint"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-021-0252-6"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-021-0229-5"},{"key":"ref75","article-title":"Point-M2AE: Multi-scale masked autoencoders for hierarchical point cloud pretraining","author":"Zhang","year":"2022","journal-title":"arXiv preprint"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2023.3282631"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/s41019-022-00200-9"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-69525-5_13"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2954747"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref81","article-title":"A causal view of compositional zero-shot recognition","author":"Atzmon","year":"2020","journal-title":"arXiv preprint"},{"key":"ref82","article-title":"Image captioning: Transforming objects into words","author":"Herdade","year":"2019","journal-title":"arXiv preprint"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01521"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01331"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01405"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01515"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00019"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.20002"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"}],"container-title":["Computational Visual Media"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10750449\/11230308\/11072093.pdf?arnumber=11072093","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T18:52:33Z","timestamp":1762455153000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11072093\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":89,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.26599\/cvm.2025.9450401","relation":{},"ISSN":["2096-0662","2096-0433"],"issn-type":[{"type":"electronic","value":"2096-0662"},{"type":"print","value":"2096-0433"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}