{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T16:56:39Z","timestamp":1777654599407,"version":"3.51.4"},"reference-count":96,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/tpami.2022.3229526","type":"journal-article","created":{"date-parts":[[2022,12,15]],"date-time":"2022-12-15T20:00:43Z","timestamp":1671134443000},"page":"1-17","source":"Crossref","is-referenced-by-count":45,"title":["TransZero++: Cross Attribute-Guided Transformer for Zero-Shot Learning"],"prefix":"10.1109","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9633-3392","authenticated-orcid":false,"given":"Shiming","family":"Chen","sequence":"first","affiliation":[{"name":"School of Electronic Information and Communication, Huazhong University of Science and Technology, Wuhan, China"}]},{"given":"Ziming","family":"Hong","sequence":"additional","affiliation":[{"name":"School of Electronic Information and Communication, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1186-2373","authenticated-orcid":false,"given":"Wenjin","family":"Hou","sequence":"additional","affiliation":[{"name":"School of Electronic Information and Communication, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5487-9845","authenticated-orcid":false,"given":"Guo-Sen","family":"Xie","sequence":"additional","affiliation":[{"name":"Nanjing University of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3667-531X","authenticated-orcid":false,"given":"Yibing","family":"Song","sequence":"additional","affiliation":[{"name":"AI Institute, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3508-756X","authenticated-orcid":false,"given":"Jian","family":"Zhao","sequence":"additional","affiliation":[{"name":"Institute of North Electronic Equipment, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0607-1777","authenticated-orcid":false,"given":"Xinge","family":"You","sequence":"additional","affiliation":[{"name":"School of Electronic Information and Communication, Huazhong University of Science and Technology, Wuhan, China"}]},{"given":"Shuicheng","family":"Yan","sequence":"additional","affiliation":[{"name":"Sea AI Lab (SAIL), Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8264-6117","authenticated-orcid":false,"given":"Ling","family":"Shao","sequence":"additional","affiliation":[{"name":"Terminus Group, China"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.13"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_13"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_24"},{"key":"ref56","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"2021","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2857768"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01059"},{"key":"ref14","first-page":"19352","article-title":"Fine-grained zero-shot learning with DNA as side information","author":"badirli","year":"2021","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01521"},{"key":"ref53","first-page":"23296","article-title":"Intriguing properties of vision transformers","author":"naseer","year":"2021","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref52","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2763441"},{"key":"ref11","first-page":"468","article-title":"Zero-shot semantic segmentation","author":"bucher","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6301"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01350-x"},{"key":"ref54","article-title":"Transformers in vision: A survey","author":"khan","year":"2021"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3046924"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00581"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3070231"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00113"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298911"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00910"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00379"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3131222"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.542"},{"key":"ref91","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref90","first-page":"2579","article-title":"Visualizing data using T-SNE","volume":"9","author":"maaten","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58542-6_29"},{"key":"ref45","article-title":"Adaptive and generative zero-shot learning","author":"chou","year":"2021","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00746"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.386"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3127346"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/134"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00717"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00680"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.376"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00240"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01405"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247998"},{"key":"ref87","first-page":"8024","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref49","first-page":"1462","article-title":"A causal view of compositional zero-shot recognition","author":"atzmon","year":"2020","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01052"},{"key":"ref7","first-page":"2121","article-title":"Devise: A deep visual-semantic embedding model","author":"frome","year":"2013","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00379"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.140"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206594"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"2332","DOI":"10.1109\/TPAMI.2015.2408354","article-title":"Transductive multi-view zero-shot learning","volume":"37","author":"yanwei","year":"2015","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2737007"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00978"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01175"},{"key":"ref40","first-page":"5998","article-title":"Stacked semantics-guided attention model for fine-grained zero-shot learning","author":"yu","year":"2018","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.575"},{"key":"ref83","article-title":"Zero-shot learning by convex combination of semantic embeddings","author":"norouzi","year":"2014","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00746"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00454"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3155602"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_33"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01268"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00961"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482471"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00561"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00115"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00859"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00986"},{"key":"ref32","article-title":"Caltech-ucsd birds 200","author":"welinder","year":"2010"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01043"},{"key":"ref2","first-page":"1410","article-title":"Zero-shot learning with semantic output codes","author":"palatucci","year":"2009","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref1","first-page":"646","article-title":"Zero-data learning of new tasks","author":"larochelle","year":"2008","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref39","first-page":"21969","article-title":"Attribute prototype network for zero-shot learning","author":"xu","year":"2020","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref38","first-page":"14917","article-title":"Semantic-guided multi-attention localization for zero-shot learning","author":"zhu","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref71","doi-asserted-by":"crossref","first-page":"2510","DOI":"10.1109\/TPAMI.2020.2965534","article-title":"Zero and few shot learning with semantic feature synthesis and competitive learning","volume":"43","author":"lu","year":"2021","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01515"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01613-9"},{"key":"ref72","article-title":"Closed-form sample probing for learning generative models in zero-shot learning","author":"cetin","year":"2022","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00779"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01331"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2487986"},{"key":"ref67","first-page":"11135","article-title":"Image captioning: Transforming objects into words","author":"herdade","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00844"},{"key":"ref25","first-page":"4281","article-title":"Generalized zero-shot learning via synthesized examples","author":"arora","year":"2018","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit"},{"key":"ref69","first-page":"19849","article-title":"Compositional zero-shot learning via fine-grained dense feature composition","author":"huynh","year":"2020","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1404"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00454"},{"key":"ref63","article-title":"Mutual mean-teaching: Pseudo label refinery for unsupervised domain adaptation on person re-identification","author":"ge","year":"2020","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19909"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref21","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"radford","year":"2021","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_35"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00019"},{"key":"ref27","first-page":"16622","article-title":"Hsva: Hierarchical semantic-visual adaptation for zero-shot learning","author":"chen","year":"2021","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_36"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00473"},{"key":"ref62","first-page":"1195","article-title":"Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results","author":"tarvainen","year":"2017","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref61","article-title":"Cooperative learning with visual attributes","author":"batra","year":"2017"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/4359286\/09987664.pdf?arnumber=9987664","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,4]],"date-time":"2023-10-04T14:46:22Z","timestamp":1696430782000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9987664\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":96,"URL":"https:\/\/doi.org\/10.1109\/tpami.2022.3229526","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]}}}