{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T13:53:49Z","timestamp":1774965229172,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657756","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T12:40:05Z","timestamp":1720701605000},"page":"872-881","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Semi-supervised Prototype Semantic Association Learning for Robust Cross-modal Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1605-1219","authenticated-orcid":false,"given":"Junsheng","family":"Wang","sequence":"first","affiliation":[{"name":"Nanjing University of Science and Technology, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8142-3782","authenticated-orcid":false,"given":"Tiantian","family":"Gong","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7618-119X","authenticated-orcid":false,"given":"Yan","family":"Yan","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology, Chicago, IL, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2021.10.013"},{"key":"e_1_3_2_1_2_1","volume-title":"Remixmatch: Semi-supervised learning with distribution alignment and augmentation anchoring. arXiv preprint arXiv:1911.09785","author":"Berthelot David","year":"2019","unstructured":"David Berthelot, Nicholas Carlini, Ekin D Cubuk, Alex Kurakin, Kihyuk Sohn, Han Zhang, and Colin Raffel. 2019. Remixmatch: Semi-supervised learning with distribution alignment and augmentation anchoring. arXiv preprint arXiv:1911.09785 (2019)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210036"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1646396.1646452"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654902"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613802"},{"key":"e_1_3_2_1_7_1","volume-title":"ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Gong Tiantian","unstructured":"Tiantian Gong, Junsheng Wang, and Liyan Zhang. 2023. Rethink Pair-Wise Self-Supervised Cross-Modal Retrieval From A Contrastive Learning Perspective. In ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1--5."},{"key":"e_1_3_2_1_8_1","volume-title":"Cross-modal retrieval augmentation for multi-modal classification. arXiv preprint arXiv:2104.08108","author":"Gur Shir","year":"2021","unstructured":"Shir Gur, Natalia Neverova, Chris Stauffer, Ser-Nam Lim, Douwe Kiela, and Austin Reiter. 2021. Cross-modal retrieval augmentation for multi-modal classification. arXiv preprint arXiv:2104.08108 (2021)."},{"key":"e_1_3_2_1_9_1","volume-title":"Canonical correlation analysis: An overview with application to learning methods. Neural computation 16, 12","author":"Hardoon David R","year":"2004","unstructured":"David R Hardoon, Sandor Szedmak, and John Shawe-Taylor. 2004. Canonical correlation analysis: An overview with application to learning methods. Neural computation 16, 12 (2004), 2639--2664."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_11_1","volume-title":"Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415","author":"Hendrycks Dan","year":"2016","unstructured":"Dan Hendrycks and Kevin Gimpel. 2016. Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415 (2016)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331213"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i01.5339"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00921"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01849"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413676"},{"key":"e_1_3_2_1_17_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00934"},{"key":"e_1_3_2_1_19_1","volume-title":"Self-supervised correlation learning for cross-modal retrieval","author":"Liu Yaxin","year":"2022","unstructured":"Yaxin Liu, JianlongWu, Leigang Qu, Tian Gan, Jianhua Yin, and Liqiang Nie. 2022. Self-supervised correlation learning for cross-modal retrieval. IEEE Transactions on Multimedia (2022)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP40778.2020.9190722"},{"key":"e_1_3_2_1_21_1","volume-title":"International Joint Conference on Artificial Intelligence. 3846--3853","author":"Peng Yuxin","year":"2016","unstructured":"Yuxin Peng, Xin Huang, and Jinwei Qi. 2016. Cross-media shared representation by hierarchical learning with multiple deep networks. In International Joint Conference on Artificial Intelligence. 3846--3853."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3284750"},{"key":"e_1_3_2_1_23_1","volume-title":"CCL: Cross-modal correlation learning with multigrained fusion by hierarchical network","author":"Peng Yuxin","year":"2017","unstructured":"Yuxin Peng, Jinwei Qi, Xin Huang, and Yuxin Yuan. 2017. CCL: Cross-modal correlation learning with multigrained fusion by hierarchical network. IEEE Transactions on Multimedia (2017), 405--420."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2852503"},{"key":"e_1_3_2_1_25_1","volume-title":"International Conference on Machine Learning. 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. 8748--8763."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/1866696.1866717"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1873987"},{"key":"e_1_3_2_1_28_1","volume-title":"Neural machine translation of rare words with subword units. arXiv preprint arXiv:1508.07909","author":"Sennrich Rico","year":"2015","unstructured":"Rico Sennrich, Barry Haddow, and Alexandra Birch. 2015. Neural machine translation of rare words with subword units. arXiv preprint arXiv:1508.07909 (2015)."},{"key":"e_1_3_2_1_29_1","volume-title":"Incomplete Cross-Modal Retrieval with Deep Correlation Transfer. ACM Transactions on Multimedia Computing, Communications and Applications","author":"Shi Dan","year":"2023","unstructured":"Dan Shi, Lei Zhu, Jingjing Li, Guohua Dong, and Huaxiang Zhang. 2023. Incomplete Cross-Modal Retrieval with Deep Correlation Transfer. ACM Transactions on Multimedia Computing, Communications and Applications (2023)."},{"key":"e_1_3_2_1_30_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_31_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123326"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the 30th ACM International Conference on Multimedia. 4300--4308","author":"Wang Junsheng","year":"2022","unstructured":"Junsheng Wang, Tiantian Gong, Zhixiong Zeng, Changchang Sun, and Yan Yan. 2022. C3CMR: Cross-Modality Cross-Instance Contrastive Learning for Cross-Media Retrieval. In Proceedings of the 30th ACM International Conference on Multimedia. 4300--4308."},{"key":"e_1_3_2_1_34_1","volume-title":"Joint feature selection and subspace learning for cross-modal retrieval","author":"He Ran","year":"2015","unstructured":"KaiyeWang, Ran He, LiangWang,WeiWang, and Tieniu Tan. 2015. Joint feature selection and subspace learning for cross-modal retrieval. IEEE Transactions on Pattern Analysis and Machine Intelligence (2015), 2010--2023."},{"key":"e_1_3_2_1_35_1","volume-title":"Large-scale approximate kernel canonical correlation analysis. arXiv preprint arXiv:1511.04773","author":"Karen Livescu WeiranWang","year":"2015","unstructured":"WeiranWang and Karen Livescu. 2015. Large-scale approximate kernel canonical correlation analysis. arXiv preprint arXiv:1511.04773 (2015)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080678"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00366"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01422"},{"key":"e_1_3_2_1_39_1","volume-title":"A comprehensive empirical study of visionlanguage pre-trained model for supervised cross-modal retrieval. arXiv preprint arXiv:2201.02772","author":"Zeng Zhixiong","year":"2022","unstructured":"Zhixiong Zeng andWenji Mao. 2022. A comprehensive empirical study of visionlanguage pre-trained model for supervised cross-modal retrieval. arXiv preprint arXiv:2201.02772 (2022)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462867"},{"key":"e_1_3_2_1_41_1","volume-title":"Learning cross-media joint representation with sparse and semisupervised regularization","author":"Zhai Xiaohua","year":"2013","unstructured":"Xiaohua Zhai, Yuxin Peng, and Jianguo Xiao. 2013. Learning cross-media joint representation with sparse and semisupervised regularization. IEEE Transactions on Circuits and Systems for Video Technology (2013), 965--978."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01157"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2723841"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3042086"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01064"}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Washington DC USA","acronym":"SIGIR 2024","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657756","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657756","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:32:20Z","timestamp":1755840740000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657756"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":45,"alternative-id":["10.1145\/3626772.3657756","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657756","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}