{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T05:19:08Z","timestamp":1768799948730,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":85,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the State Key Program and the Foundation for Innovative Research Groups of the National Natural Science Foundation of China","award":["61836009"],"award-info":[{"award-number":["61836009"]}]},{"name":"the Key Scientific Technological Innovation Research Project by Ministry of Education"},{"name":"the Program for Cheung Kong Scholars and Innovative Research Team in University","award":["IRT 15R53"],"award-info":[{"award-number":["IRT 15R53"]}]},{"name":"the National Natural Science Foundation of China under Grant","award":["62271377"],"award-info":[{"award-number":["62271377"]}]},{"name":"the Key Research and Development Program of Shannxi","award":["2021ZDLGY01-06, 2022ZDLGY01-12, 2023YBGY244, 2023QCYLL28, 2024GX-ZDCYL-02-08, 2024GX-ZDCYL-02-17"],"award-info":[{"award-number":["2021ZDLGY01-06, 2022ZDLGY01-12, 2023YBGY244, 2023QCYLL28, 2024GX-ZDCYL-02-08, 2024GX-ZDCYL-02-17"]}]},{"name":"the Joint Funds of the National Natural Science Foundation of China","award":["U22B2054"],"award-info":[{"award-number":["U22B2054"]}]},{"name":"the National Key Research and Development Program of China under Grant","award":["2021ZD0110404,2021ZD0110400"],"award-info":[{"award-number":["2021ZD0110404,2021ZD0110400"]}]},{"name":"the Fund for Foreign Scholars in University Research and Teaching Program?s 111 Project","award":["B07048"],"award-info":[{"award-number":["B07048"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681280","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"9719-9728","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Accurate and Lightweight Learning for Specific Domain Image-Text Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3209-0456","authenticated-orcid":false,"given":"Rui","family":"Yang","sequence":"first","affiliation":[{"name":"Xidian University, Xi'an, Shaanxi, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4940-1211","authenticated-orcid":false,"given":"Shuang","family":"Wang","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, Shaanxi, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3842-5725","authenticated-orcid":false,"given":"Jianwei","family":"Tao","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, Shaanxi, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6283-9735","authenticated-orcid":false,"given":"Yingping","family":"Han","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, Shaanxi, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1642-5114","authenticated-orcid":false,"given":"Qiaoling","family":"Lin","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, Shaanxi, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6144-5070","authenticated-orcid":false,"given":"YanHe","family":"Guo","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, Shaanxi, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1996-186X","authenticated-orcid":false,"given":"Biao","family":"Hou","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, Shaanxi, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3354-9617","authenticated-orcid":false,"given":"Licheng","family":"Jiao","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, Shaanxi, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6229"},{"key":"e_1_3_2_1_2_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining. 535--541","author":"Caruana Rich","year":"2006","unstructured":"Rich Caruana, and Alexandru Niculescu-Mizil. 2006. Model compression. In Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining. 535--541."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01452"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01267"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00208"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3477943"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3070872"},{"key":"e_1_3_2_1_9_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_10_1","volume-title":"Semantically self-aligned network for text-to-image part-aware person re-identification. arXiv preprint arXiv:2107.12666","author":"Ding Zefeng","year":"2021","unstructured":"Zefeng Ding, Changxing Ding, Zhiyin Shao, and Dacheng Tao. 2021. Semantically self-aligned network for text-to-image part-aware person re-identification. arXiv preprint arXiv:2107.12666 (2021)."},{"key":"e_1_3_2_1_11_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_12_1","volume-title":"Jamie Ryan Kiros, and Sanja Fidler","author":"Faghri Fartash","year":"2017","unstructured":"Fartash Faghri, David J Fleet, Jamie Ryan Kiros, and Sanja Fidler. 2017. Vse: Improving visual-semantic embeddings with hard negatives. arXiv preprint arXiv:1707.05612 (2017)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3273719"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2021.08.002"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00067"},{"key":"e_1_3_2_1_17_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_18_1","volume-title":"International Conference on Machine Learning. PMLR, 4904--4916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International Conference on Machine Learning. PMLR, 4904--4916."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00273"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6777"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"e_1_3_2_1_23_1","volume-title":"Deep fragment embeddings for bidirectional image sentence mapping. Advances in neural information processing systems","author":"Karpathy Andrej","year":"2014","unstructured":"Andrej Karpathy, Armand Joulin, and Li F Fei-Fei. 2014. Deep fragment embeddings for bidirectional image sentence mapping. Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"e_1_3_2_1_25_1","volume-title":"International Conference on Machine Learning. PMLR, 5583--5594","author":"Kim Wonjae","year":"2021","unstructured":"Wonjae Kim, Bokyung Son, and Ildoo Kim. 2021. Vilt: Vision-and-language transformer without convolution or region supervision. In International Conference on Machine Learning. PMLR, 5583--5594."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.209"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.551"},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings, Part XXX 16","author":"Li Xiujun","year":"2020","unstructured":"Xiujun Li, Xi Yin, Chunyuan Li, Pengchuan Zhang, Xiaowei Hu, Lei Zhang, Lijuan Wang, Houdong Hu, Li Dong, Furu Wei, et al. 2020. Oscar: Object-semantics aligned pre-training for vision-language tasks. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part XXX 16. Springer, 121--137."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS52108.2023.10281578"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_33_1","volume-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2776321"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2021.104168"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00970"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746251"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547753"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2984883"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612374"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01820"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CITS.2016.7546397"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413961"},{"key":"e_1_3_2_1_44_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_45_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans and Ilya Sutskever. [n. d.] a. Improving Language Understanding by Generative Pre-Training. ( [n. d.])."},{"key":"e_1_3_2_1_46_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. [n. d.] b. Language models are unsupervised multitask learners. ( [n. d.])."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2022.3215803"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00591"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01026"},{"key":"e_1_3_2_1_50_1","volume-title":"European Conference on Computer Vision. Springer, 624--641","author":"Shu Xiujun","year":"2022","unstructured":"Xiujun Shu, Wei Wen, Haoqian Wu, Keyu Chen, Yiran Song, Ruizhi Qiao, Bo Ren, and Xiao Wang. 2022. See finer, see more: Implicit modality alignment for text-based person retrieval. In European Conference on Computer Vision. Springer, 624--641."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","unstructured":"Hao Tan and Mohit Bansal. 2019. LXMERT: Learning Cross-Modality Encoder Representations from Transformers. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics Hong Kong China 5100--5111. https:\/\/doi.org\/10.18653\/v1\/D19--1514","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3591106.3592236"},{"key":"e_1_3_2_1_53_1","volume-title":"Proceedings, Part XXIV 16","author":"Wang Xiaobo","year":"2020","unstructured":"Xiaobo Wang, Tianyu Fu, Shengcai Liao, Shuo Wang, Zhen Lei, and Tao Mei. 2020. Exclusivity-consistency regularized knowledge distillation for face recognition. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part XXIV 16. Springer, 325--342."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00586"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.108891"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548166"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.29.4.043028"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02008"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01286-x"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3251104"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611832"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3327924"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3310118"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110273"},{"key":"e_1_3_2_1_65_1","volume-title":"Transcending Fusion: A Multi-Scale Alignment Method for Remote Sensing Image-Text Retrieval. arXiv preprint arXiv:2405.18959","author":"Yang Rui","year":"2024","unstructured":"Rui Yang, Shuang Wang, Yingping Han, Yuanheng Li, Dong Zhao, Dou Quan, Yanhe Guo, and Licheng Jiao. 2024. Transcending Fusion: A Multi-Scale Alignment Method for Remote Sensing Image-Text Retrieval. arXiv preprint arXiv:2405.18959 (2024)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2022.3194076"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612207"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2022.3231851"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3308969"},{"key":"e_1_3_2_1_70_1","first-page":"1","article-title":"Exploring a Fine-Grained Multiscale Method for Cross-Modal Remote Sensing Image Retrieval","volume":"60","author":"Yuan Zhiqiang","year":"2021","unstructured":"Zhiqiang Yuan, Wenkai Zhang, Kun Fu, Xuan Li, Chubo Deng, Hongqi Wang, and Xian Sun. 2021. Exploring a Fine-Grained Multiscale Method for Cross-Modal Remote Sensing Image Retrieval. IEEE Transactions on Geoscience and Remote Sensing, Vol. 60 (2021), 1--19.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_71_1","first-page":"1","article-title":"A lightweight multi-scale crossmodal text-image retrieval method in remote sensing","volume":"60","author":"Yuan Zhiqiang","year":"2021","unstructured":"Zhiqiang Yuan, Wenkai Zhang, Xuee Rong, Xuan Li, Jialiang Chen, Hongqi Wang, Kun Fu, and Xian Sun. 2021. A lightweight multi-scale crossmodal text-image retrieval method in remote sensing. IEEE Transactions on Geoscience and Remote Sensing, Vol. 60 (2021), 1--19.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jag.2022.103071"},{"key":"e_1_3_2_1_73_1","first-page":"1","article-title":"Remote Sensing Cross-Modal Text-Image Retrieval Based on Global and Local Information","volume":"60","author":"Yuan Zhiqiang","year":"2022","unstructured":"Zhiqiang Yuan, Wenkai Zhang, Changyuan Tian, Xuee Rong, Zhengyuan Zhang, Hongqi Wang, Kun Fu, and Xian Sun. 2022. Remote Sensing Cross-Modal Text-Image Retrieval Based on Global and Local Information. IEEE Transactions on Geoscience and Remote Sensing, Vol. 60 (2022), 1--16.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_74_1","first-page":"1","article-title":"Remote Sensing Cross-Modal Text-Image Retrieval Based on Global and Local Information","volume":"60","author":"Yuan Zhiqiang","year":"2022","unstructured":"Zhiqiang Yuan, Wenkai Zhang, Changyuan Tian, Xuee Rong, Zhengyuan Zhang, Hongqi Wang, Kun Fu, and Xian Sun. 2022. Remote Sensing Cross-Modal Text-Image Retrieval Based on Global and Local Information. IEEE Transactions on Geoscience and Remote Sensing, Vol. 60 (2022), 1--16.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_75_1","volume-title":"Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts. In International Conference on Machine Learning. PMLR, 25994--26009","author":"Zeng Yan","year":"2022","unstructured":"Yan Zeng, Xinsong Zhang, and Hang Li. 2022. Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts. In International Conference on Machine Learning. PMLR, 25994--26009."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TGRS.2023.3333375","article-title":"Exploring Uni-Modal Feature Learning on Entities and Relations for Remote Sensing Cross-Modal Text-Image Retrieval","volume":"61","author":"Zhang Shun","year":"2023","unstructured":"Shun Zhang, Yupeng Li, and Shaohui Mei. 2023. Exploring Uni-Modal Feature Learning on Entities and Relations for Remote Sensing Cross-Modal Text-Image Retrieval. IEEE Transactions on Geoscience and Remote Sensing, Vol. 61 (2023), 1--17.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3318227"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01129"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01758"},{"key":"e_1_3_2_1_81_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Zhao Long","unstructured":"Long Zhao, Xi Peng, Yuxiao Chen, Mubbasir Kapadia, and Dimitris N. Metaxas. 2020. Knowledge As Priors: Cross-Modal Knowledge Generalization for Datasets Without Superior Knowledge. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413864"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383184"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475369"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681280","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681280","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:43Z","timestamp":1750295863000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681280"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":85,"alternative-id":["10.1145\/3664647.3681280","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681280","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}