{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T21:50:39Z","timestamp":1776289839234,"version":"3.50.1"},"reference-count":408,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172263"],"award-info":[{"award-number":["62172263"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["22120240033"],"award-info":[{"award-number":["22120240033"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["22120240634"],"award-info":[{"award-number":["22120240634"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Proc. IEEE"],"published-print":{"date-parts":[[2024,11]]},"DOI":"10.1109\/jproc.2024.3525147","type":"journal-article","created":{"date-parts":[[2025,1,15]],"date-time":"2025-01-15T20:02:10Z","timestamp":1736971330000},"page":"1716-1754","source":"Crossref","is-referenced-by-count":46,"title":["Cross-Modal Retrieval: A Systematic Review of Methods and Future Directions"],"prefix":"10.1109","volume":"112","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8013-5188","authenticated-orcid":false,"given":"Tianshi","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3432-6215","authenticated-orcid":false,"given":"Fengling","family":"Li","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute, University of Technology Sydney, Sydney, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2993-7142","authenticated-orcid":false,"given":"Lei","family":"Zhu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5504-2529","authenticated-orcid":false,"given":"Jingjing","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1470-6998","authenticated-orcid":false,"given":"Zheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shenzhen Key Laboratory of Visual Object Detection and Recognition, Harbin Institute of Technology, Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2999-2088","authenticated-orcid":false,"given":"Heng Tao","family":"Shen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Tongji University, Shanghai, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2018.2798607"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3637870"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/34.895972"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2017.2699960"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1873987"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.5591\/978-1-57735-516-8\/IJCAI11-230"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1126\/science.aaa8415"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654902"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.225"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3234150"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01752"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3220297"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-022-1369-5"},{"key":"ref14","article-title":"A comprehensive survey on cross-modal retrieval","author":"Wang","year":"2016","journal-title":"arXiv:1607.06215"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2705068"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.cosrev.2020.100336"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/759"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2742704"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240712"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3389547"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00402"},{"key":"ref22","first-page":"1","article-title":"Generative adversarial nets","volume-title":"Proc. NIPS","volume":"27","author":"Goodfellow"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2008.2005605"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2012.6467268"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3994"},{"key":"ref27","first-page":"1247","article-title":"Deep canonical correlation analysis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Andrew"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0658-4"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.12"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/860435.860460"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540000"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126524"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390294"},{"key":"ref34","first-page":"3846","article-title":"Cross-media shared representation by hierarchical learning with multiple deep networks","volume-title":"Proc. Int. Joint Conf. Artif. Intell.","author":"Peng"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-015-0391-4"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240607"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/2671188.2749341"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.442"},{"key":"ref39","article-title":"VSE++: Improving visual-semantic embeddings with hard negatives","author":"Faghri","year":"2017","journal-title":"arXiv:1707.05612"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00805"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351053"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01267"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3407664"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3254530"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.767"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00419"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/124"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00359"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3392619"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/720"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/iccv.2019.00475"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr42600.2020.01093"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16209"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_40"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.01361"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1609.02907"},{"key":"ref58","article-title":"Graph attention networks","author":"Veli\u010dkovi\u0107","year":"2017","journal-title":"arXiv:1710.10903"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00208"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00831"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3182426"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3286710"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01455"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3321504"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01278"},{"key":"ref69","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","volume-title":"Proc. ICML","author":"Jia"},{"key":"ref70","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"139","author":"Radford"},{"key":"ref71","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. NAACL-HLT","author":"Kenton"},{"key":"ref72","first-page":"1","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","volume-title":"Proc. ICLR","author":"Dosovitskiy"},{"key":"ref73","first-page":"26418","article-title":"Wukong: A 100 million large-scale Chinese cross-modal pre-training benchmark","volume-title":"Proc. NIPS","volume":"35","author":"Gu"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548107"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02485"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01524"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570481"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00273"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3348297"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1145\/3543873.3584627"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00645"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475465"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.201"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00750"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.639"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247923"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.466"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2508146"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v27i1.8603"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2013.2276704"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2505311"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1145\/2775109"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080678"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654901"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1145\/2556195.2556238"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502087"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2964336"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.424"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2852503"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2019.01064"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412081"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/tip.2024.3374111"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123326"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1145\/3284750"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2985540"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3153962"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.106439"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240521"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3101642"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16345"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548195"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3282894"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/454"},{"key":"ref115","article-title":"A comprehensive empirical study of vision-language pre-trained model for supervised cross-modal retrieval","author":"Zeng","year":"2022","journal-title":"arXiv:2201.02772"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3280546"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.267"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609610"},{"key":"ref119","first-page":"3890","article-title":"Semantic topic multimodal hashing for cross-media retrieval","volume-title":"Proc. Int. Conf. Artif. Intell.","author":"Wang"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2723302"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2024.3350541"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502107"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2013.2291214"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1145\/2463676.2465274"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2645565"},{"key":"ref126","first-page":"1328","article-title":"Predictable dual-view hashing","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rastegari"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654906"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2890144"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3385986"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.219"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.224"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911493"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123355"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/396"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2455415"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3251395"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3243608"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11263"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301176"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2878970"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2922128"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00312"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401086"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i5.16592"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3053766"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3218656"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i12.29280"},{"key":"ref148","first-page":"1035","article-title":"Hugs are better than handshakes: Unsupervised cross-modal transformer hashing with multi-granularity alignment","volume-title":"Proc. British Mach. Vis. Conf.","author":"Wang"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101968"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00319"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475286"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1145\/3460426.3463626"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2022.3172716"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2024.3350695"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2018.00446"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/tip.2016.2564638"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2676345"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2940446"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2861000"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.2974825"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3287301"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2023.3241018"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609563"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967206"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2592800"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3102119"},{"key":"ref167","first-page":"3946","article-title":"Quantized correlation hashing for fast cross-modal search","volume-title":"Proc. 24th Int. Joint Conf. Artif. Intell.","author":"Wu"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3272169"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1145\/2983323.2983743"},{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240683"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3340102"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2487668"},{"key":"ref173","first-page":"1385","article-title":"Co-regularized hashing for multimodal data","volume-title":"Proc. NIPS","author":"Zhen"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v28i1.8995"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2610969"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1145\/2911996.2912000"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2655059"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358104"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939812"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.2987312"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2997020"},{"key":"ref182","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3385656"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.348"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10719"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3254199"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2821921"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014400"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3392763"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2963957"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1145\/3372278.3390711"},{"key":"ref191","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3177901"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3293104"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/138"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475346"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3174970"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2023.3338951"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548187"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3285266"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612411"},{"key":"ref200","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3401050"},{"key":"ref201","doi-asserted-by":"publisher","DOI":"10.1109\/tmm.2018.2877122"},{"key":"ref202","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2020.3017344"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2021.3052490"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109276"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11218"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.1109\/tmm.2021.3105824"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00536"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3172216"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3286546"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547922"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3247939"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27911"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02568"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.282"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3245400"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1145\/3323873.3325041"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462867"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413676"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1145\/3637442"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3410388"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1145\/3206025.3206033"},{"key":"ref222","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2928180"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3532028"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3190222"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1145\/3460426.3463632"},{"key":"ref226","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/292"},{"key":"ref227","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02034"},{"key":"ref228","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351078"},{"key":"ref229","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3136330"},{"key":"ref230","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331213"},{"key":"ref231","first-page":"63529","article-title":"Achieving cross modal generalization with multimodal unified representation","volume-title":"Proc. AAAI","volume":"36","author":"Xia"},{"key":"ref232","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.9982"},{"key":"ref233","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401132"},{"key":"ref234","doi-asserted-by":"publisher","DOI":"10.1145\/3572032"},{"key":"ref235","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3330975"},{"key":"ref236","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645716"},{"key":"ref237","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2019.2953692"},{"key":"ref238","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01471"},{"key":"ref239","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2018.2879846"},{"key":"ref240","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3374791"},{"key":"ref241","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462989"},{"key":"ref242","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613837"},{"key":"ref243","doi-asserted-by":"publisher","DOI":"10.1145\/3674507"},{"key":"ref244","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681319"},{"key":"ref245","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475483"},{"key":"ref246","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.10.013"},{"key":"ref247","first-page":"46433","article-title":"COLA: A benchmark for compositional text-to-image retrieval","volume-title":"Proc. NIPS","volume":"36","author":"Ray"},{"key":"ref248","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3417694"},{"key":"ref249","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350974"},{"key":"ref250","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547809"},{"key":"ref251","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331229"},{"key":"ref252","doi-asserted-by":"publisher","DOI":"10.1109\/tmm.2022.3140656"},{"key":"ref253","first-page":"10791","article-title":"Cross-modal learning with adversarial samples","volume-title":"Proc. NIPS","volume":"32","author":"Li"},{"key":"ref254","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2023.3263054"},{"key":"ref255","doi-asserted-by":"publisher","DOI":"10.1109\/iccv48922.2021.00222"},{"key":"ref256","doi-asserted-by":"publisher","DOI":"10.1145\/3559758"},{"key":"ref257","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3378918"},{"key":"ref258","doi-asserted-by":"publisher","DOI":"10.1145\/3650205"},{"key":"ref259","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462837"},{"key":"ref260","doi-asserted-by":"publisher","DOI":"10.1145\/3545799"},{"key":"ref261","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613772"},{"key":"ref262","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3344097"},{"key":"ref263","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27941"},{"key":"ref264","doi-asserted-by":"publisher","DOI":"10.1145\/3512527.3531381"},{"key":"ref265","first-page":"1","article-title":"NII-HITACHI-UIT at TRECVID 2016","volume-title":"Proc. TRECVID","author":"Le"},{"key":"ref266","first-page":"1","article-title":"ITI-CERTH participation in TRECVID 2016","volume-title":"Proc. TRECVID Workshop","author":"Foteini"},{"key":"ref267","first-page":"1","article-title":"Waseda_Meisei at TRECVID 2017: Ad-hoc video search","volume-title":"Proc. TRECVID","author":"Ueki"},{"key":"ref268","doi-asserted-by":"publisher","DOI":"10.1145\/3078971.3079041"},{"key":"ref269","article-title":"Learning language-visual embedding for movie understanding with natural-language","author":"Torabi","year":"2016","journal-title":"arXiv:1609.08124"},{"key":"ref270","doi-asserted-by":"publisher","DOI":"10.1145\/3206025.3206064"},{"key":"ref271","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00272"},{"key":"ref272","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.347"},{"key":"ref273","article-title":"Learning a text-video embedding from incomplete and heterogeneous data","author":"Miech","year":"2018","journal-title":"arXiv:1804.02516"},{"key":"ref274","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2832602"},{"key":"ref275","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3235523"},{"key":"ref276","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01065"},{"key":"ref277","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.374"},{"key":"ref278","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462974"},{"key":"ref279","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3090595"},{"key":"ref280","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3411298"},{"key":"ref281","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_13"},{"key":"ref282","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01170"},{"key":"ref283","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"issue":"8","key":"ref284","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref285","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.07.028"},{"key":"ref286","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00635"},{"key":"ref287","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01031"},{"key":"ref288","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28475"},{"key":"ref289","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01622"},{"key":"ref290","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01566"},{"key":"ref291","doi-asserted-by":"publisher","DOI":"10.1145\/1460096.1460115"},{"key":"ref292","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052990"},{"key":"ref293","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3149712"},{"key":"ref294","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746786"},{"key":"ref295","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11115"},{"key":"ref296","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3592025"},{"key":"ref297","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096972"},{"key":"ref298","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3358048"},{"key":"ref299","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446380"},{"key":"ref300","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"ref301","doi-asserted-by":"publisher","DOI":"10.1109\/tgrs.2024.3407857"},{"key":"ref302","doi-asserted-by":"publisher","DOI":"10.1109\/wacv56688.2023.00221"},{"key":"ref303","doi-asserted-by":"publisher","DOI":"10.1109\/tgrs.2023.3264006"},{"key":"ref304","doi-asserted-by":"publisher","DOI":"10.1109\/tgrs.2021.3060705"},{"key":"ref305","doi-asserted-by":"publisher","DOI":"10.1109\/tmm.2023.3323876"},{"key":"ref306","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.2979273"},{"key":"ref307","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3155283"},{"key":"ref308","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3269300"},{"key":"ref309","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095807"},{"key":"ref310","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9859647"},{"key":"ref311","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2024.3388949"},{"key":"ref312","doi-asserted-by":"publisher","DOI":"10.1145\/3564608"},{"key":"ref313","doi-asserted-by":"publisher","DOI":"10.1145\/3575658"},{"key":"ref314","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00316"},{"key":"ref315","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01121"},{"key":"ref316","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01117"},{"key":"ref317","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01610"},{"key":"ref318","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3412760"},{"key":"ref319","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28566"},{"key":"ref320","article-title":"Riemann-based multi-scale attention reasoning network for text-3D retrieval","author":"Li","year":"2024","journal-title":"arXiv:2408.13712"},{"key":"ref321","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00870"},{"key":"ref322","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00199"},{"key":"ref323","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3326581"},{"key":"ref324","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3338058"},{"key":"ref325","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25141"},{"key":"ref326","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2667710"},{"key":"ref327","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6941"},{"key":"ref328","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02488"},{"key":"ref329","first-page":"1","article-title":"Learning to jointly understand visual and tactile signals","volume-title":"Proc. ICLR","author":"Li"},{"key":"ref330","doi-asserted-by":"publisher","DOI":"10.1145\/3543873.3587649"},{"key":"ref331","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109915"},{"key":"ref332","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681296"},{"key":"ref333","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"ref334","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540092"},{"key":"ref335","doi-asserted-by":"publisher","DOI":"10.1145\/1460096.1460104"},{"key":"ref336","first-page":"139","article-title":"Collecting image annotations using Amazon\u2019s mechanical Turk","volume-title":"Proc. Workshop Creating Speech Lang. Data With Amazon\u2019s Mech. Turk","author":"Rashtchian"},{"key":"ref337","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref338","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref339","doi-asserted-by":"publisher","DOI":"10.1145\/1646396.1646452"},{"key":"ref340","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2009.03.008"},{"key":"ref341","article-title":"Fashion-gen: The generative fashion dataset and challenge","author":"Rostamzadeh","year":"2018","journal-title":"arXiv:1806.08317"},{"key":"ref342","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.327"},{"key":"ref343","article-title":"The caltech-UCSD birds-200\u20132011 dataset","author":"Wah","year":"2011","journal-title":"CaltechDATA"},{"key":"ref344","article-title":"Caltech-256 object category dataset","author":"Griffin","year":"2007","journal-title":"CaltechDATA"},{"key":"ref345","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0090-8"},{"key":"ref346","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref347","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"key":"ref348","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"ref349","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.83"},{"key":"ref350","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0987-1"},{"key":"ref351","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00468"},{"key":"ref352","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1011"},{"key":"ref353","doi-asserted-by":"publisher","DOI":"10.1109\/cits.2016.7546397"},{"key":"ref354","doi-asserted-by":"publisher","DOI":"10.1109\/tgrs.2021.3078451"},{"key":"ref355","doi-asserted-by":"publisher","DOI":"10.1109\/tgrs.2017.2776321"},{"key":"ref356","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_16"},{"key":"ref357","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2018.00374"},{"key":"ref358","doi-asserted-by":"publisher","DOI":"10.1109\/SPLIM.2016.7528403"},{"key":"ref359","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298801"},{"key":"ref360","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00314"},{"key":"ref361","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00277"},{"key":"ref362","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548237"},{"key":"ref363","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2024.3381347"},{"key":"ref364","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00512"},{"key":"ref365","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401430"},{"key":"ref366","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210036"},{"key":"ref367","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240627"},{"key":"ref368","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3384672"},{"key":"ref369","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_43"},{"key":"ref370","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00227"},{"key":"ref371","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570428"},{"key":"ref372","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3068825"},{"key":"ref373","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02078"},{"key":"ref374","doi-asserted-by":"publisher","DOI":"10.1145\/3591106.3592236"},{"key":"ref375","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3417421"},{"key":"ref376","doi-asserted-by":"publisher","DOI":"10.1007\/s11280-021-00881-8"},{"key":"ref377","article-title":"Deep supervised information bottleneck hashing for cross-modal retrieval based computer-aided diagnosis","author":"Shi","year":"2022","journal-title":"arXiv:2205.08365"},{"key":"ref378","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-34048-2_36"},{"key":"ref379","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02236"},{"key":"ref380","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01163"},{"key":"ref381","first-page":"15309","article-title":"Retrieval-augmented diffusion models","volume-title":"Proc. NIPS","volume":"35","author":"Blattmann"},{"key":"ref382","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00278"},{"key":"ref383","first-page":"22820","article-title":"Fine-grained late-interaction multi-modal retrieval for retrieval augmented visual question answering","volume-title":"Proc. NIPS","volume":"36","author":"Lin"},{"key":"ref384","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_14"},{"key":"ref385","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-vision-082114-035447"},{"key":"ref386","article-title":"Informed pre-training on prior knowledge","author":"von Rueden","year":"2022","journal-title":"arXiv:2205.11433"},{"key":"ref387","first-page":"7642","article-title":"Long-tail cross modal hashing","volume-title":"Proc. AAAI","author":"Gao"},{"key":"ref388","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2012.179"},{"key":"ref389","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref390","doi-asserted-by":"publisher","DOI":"10.5555\/3305381"},{"key":"ref391","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"},{"key":"ref392","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-016-0043-6"},{"key":"ref393","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00283"},{"key":"ref394","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3195549"},{"issue":"1","key":"ref395","first-page":"3117","article-title":"A survey of algorithms and analysis for adaptive online learning","volume":"18","author":"McMahan","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref396","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3183326"},{"key":"ref397","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01453-z"},{"key":"ref398","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835511"},{"key":"ref399","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00307"},{"key":"ref400","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-10088-y"},{"key":"ref401","doi-asserted-by":"publisher","DOI":"10.1145\/3457607"},{"key":"ref402","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.343"},{"key":"ref403","article-title":"Fair diffusion: Instructing text-to-image generation models on fairness","author":"Friedrich","year":"2023","journal-title":"arXiv:2302.10893"},{"key":"ref404","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1356"},{"key":"ref405","doi-asserted-by":"publisher","DOI":"10.1145\/3564284"},{"key":"ref406","doi-asserted-by":"publisher","DOI":"10.1145\/2523616.2523633"},{"key":"ref407","doi-asserted-by":"publisher","DOI":"10.1145\/2934664"},{"key":"ref408","article-title":"Federated learning: Strategies for improving communication efficiency","author":"Kone\u010dn\u00fd","year":"2016","journal-title":"arXiv:1610.05492"}],"container-title":["Proceedings of the IEEE"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5\/10870449\/10843094.pdf?arnumber=10843094","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,5]],"date-time":"2025-02-05T18:55:33Z","timestamp":1738781733000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10843094\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11]]},"references-count":408,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/jproc.2024.3525147","relation":{},"ISSN":["0018-9219","1558-2256"],"issn-type":[{"value":"0018-9219","type":"print"},{"value":"1558-2256","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11]]}}}