{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T22:43:40Z","timestamp":1765233820773,"version":"3.40.3"},"reference-count":97,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1109\/tpami.2024.3412760","type":"journal-article","created":{"date-parts":[[2024,6,11]],"date-time":"2024-06-11T18:26:04Z","timestamp":1718130364000},"page":"8976-8993","source":"Crossref","is-referenced-by-count":6,"title":["HOPE: A Hierarchical Perspective for Semi-Supervised 2D-3D Cross-Modal Retrieval"],"prefix":"10.1109","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5250-7258","authenticated-orcid":false,"given":"Fan","family":"Zhang","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, Georgia Institute of Technology, Shenzhen, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9535-6287","authenticated-orcid":false,"given":"Hang","family":"Zhou","sequence":"additional","affiliation":[{"name":"Department of Statistics, University of California, Davis, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8232-5049","authenticated-orcid":false,"given":"Xian-Sheng","family":"Hua","sequence":"additional","affiliation":[{"name":"Terminus Group, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0213-9957","authenticated-orcid":false,"given":"Chong","family":"Chen","sequence":"additional","affiliation":[{"name":"Terminus Group, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7987-3714","authenticated-orcid":false,"given":"Xiao","family":"Luo","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, CA, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01117"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3548690"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2960234"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01121"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3251697"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3070969"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351009"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6859"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3191761"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00316"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.329"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3159589"},{"key":"ref13","first-page":"17 612","article-title":"Mind the gap: Understanding the modality gap in multi-modal contrastive representation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liang"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.3390\/app12063180"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475465"},{"article-title":"Open-vocabulary 3D detection via image-level class and debiased cross-modal contrastive learning","year":"2022","author":"Lu","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2967597"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00148"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01090"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01064"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3247939"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3188547"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3218656"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00257"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00475"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00957"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00990"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01093"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16209"},{"key":"ref32","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00923"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014400"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3221785"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01272"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00120"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3148853"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00402"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413631"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1111\/1467-8659.00669"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2904460"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018513"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.366"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00319"},{"key":"ref46","first-page":"14 675","article-title":"Hard example generation by texture synthesis for cross-domain shape similarity learning","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Fu"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482247"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01495"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00729"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01740"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3308189"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3215186"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3233584"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01484"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3273592"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403117"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.44"},{"article-title":"Temporal ensembling for semi-supervised learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Laine","key":"ref58"},{"key":"ref59","first-page":"1171","article-title":"Regularization with stochastic transformations and perturbations for deep semi-supervised learning","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Sajjadi"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01070"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01209"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547892"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_27"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00413"},{"key":"ref65","first-page":"596","article-title":"FixMatch: Simplifying semi-supervised learning with consistency and confidence","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Sohn"},{"key":"ref66","first-page":"27 734","article-title":"You never cluster alone","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Shen"},{"key":"ref67","first-page":"18 408","article-title":"Flexmatch: Boosting semi-supervised learning with curriculum pseudo labeling","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313573"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01747"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525087"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"article-title":"Prototypical contrastive learning of unsupervised representations","year":"2020","author":"Li","key":"ref72"},{"key":"ref73","first-page":"9912","article-title":"Unsupervised learning of visual features by contrasting cluster assignments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Caron"},{"article-title":"Rethinking prototypical contrastive learning through alignment, uniformity and correlation","year":"2022","author":"Mo","key":"ref74"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/125"},{"key":"ref76","first-page":"214","article-title":"Wasserstein generative adversarial networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Arjovsky"},{"key":"ref77","first-page":"31 030","article-title":"Cross-modal fine-tuning: Align then refine","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Shen"},{"key":"ref78","first-page":"8536","article-title":"Co-teaching: Robust training of deep neural networks with extremely noisy labels","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Han"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00536"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3101642"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/splim.2016.7528403"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2015.7298801"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-4380-9_14"},{"key":"ref84","first-page":"1247","article-title":"Deep canonical correlation analysis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Andrew"},{"key":"ref85","first-page":"1083","article-title":"On deep multi-view representation learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i5.16592"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02079"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00264"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00423"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00699"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00422"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00421"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01521"},{"key":"ref94","first-page":"24286","article-title":"One loss for all: Deep hashing with a single cosine similarity based learning objective","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Hoe"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02246"},{"article-title":"Multimodal semi-supervised learning for 3D objects","year":"2021","author":"Chen","key":"ref96"},{"issue":"11","key":"ref97","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/10746266\/10553262.pdf?arnumber=10553262","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T04:12:46Z","timestamp":1743826366000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10553262\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":97,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2024.3412760","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"type":"print","value":"0162-8828"},{"type":"electronic","value":"2160-9292"},{"type":"electronic","value":"1939-3539"}],"subject":[],"published":{"date-parts":[[2024,12]]}}}