{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T08:57:32Z","timestamp":1770800252615,"version":"3.50.0"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T00:00:00Z","timestamp":1767398400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T00:00:00Z","timestamp":1767398400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Open Project Program of State Key Laboratory of CNS\/ATM","award":["2024A01"],"award-info":[{"award-number":["2024A01"]}]},{"name":"Beijing Natural Science Foundation","award":["4254103"],"award-info":[{"award-number":["4254103"]}]},{"DOI":"10.13039\/501100021171","name":"GuangDong Basic and Applied Basic Research Foundation","doi-asserted-by":"crossref","award":["2023A1515110082"],"award-info":[{"award-number":["2023A1515110082"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s00530-025-02121-9","type":"journal-article","created":{"date-parts":[[2026,1,4]],"date-time":"2026-01-04T02:32:58Z","timestamp":1767493978000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-model cooperative denoising for robust cross-modal retrieval with noisy labels"],"prefix":"10.1007","volume":"32","author":[{"given":"Man","family":"Wu","sequence":"first","affiliation":[]},{"given":"Hengmiao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Xiong","family":"Luo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,3]]},"reference":[{"key":"2121_CR1","doi-asserted-by":"crossref","unstructured":"Wang, B., Yang, Y., Xu, X., Hanjalic, A., Shen, H.T.: Adversarial cross-modal retrieval. ACM MM, pp. 154\u2013162 (2017)","DOI":"10.1145\/3123266.3123326"},{"key":"2121_CR2","doi-asserted-by":"crossref","unstructured":"Ranjan, V., Rasiwasia, N., Jawahar, C.: Multi-label cross-modal retrieval. Proceedings of the IEEE international conference on computer vision, pp. 4094\u20134102 (2015)","DOI":"10.1109\/ICCV.2015.466"},{"issue":"3","key":"2121_CR3","doi-asserted-by":"publisher","first-page":"2440","DOI":"10.1609\/aaai.v35i3.16345","volume":"35","author":"S Qian","year":"2021","unstructured":"Qian, S., Xue, D., Zhang, H., Fang, Q., Xu, C.: Dual adversarial graph neural networks for multi-label cross-modal retrieval. Proc. AAAI Conf. Artif. Intell. 35(3), 2440\u20132448 (2021). https:\/\/doi.org\/10.1609\/aaai.v35i3.16345","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"issue":"4","key":"2121_CR4","doi-asserted-by":"publisher","first-page":"4794","DOI":"10.1109\/TPAMI.2022.3188547","volume":"45","author":"S Qian","year":"2023","unstructured":"Qian, S., Xue, D., Fang, Q., Xu, C.: Integrating multi-label contrastive learning with dual adversarial graph neural networks for cross-modal retrieval. IEEE Trans. Pattern Anal. Mach. Intell. 45(4), 4794\u20134811 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2022.3188547","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2121_CR5","doi-asserted-by":"publisher","first-page":"3520","DOI":"10.1109\/TMM.2021.3101642","volume":"24","author":"S Qian","year":"2022","unstructured":"Qian, S., Xue, D., Fang, Q., Xu, C.: Adaptive label-aware graph convolutional networks for cross-modal retrieval. IEEE Trans. Multimed. 24, 3520\u20133532 (2022). https:\/\/doi.org\/10.1109\/TMM.2021.3101642","journal-title":"IEEE Trans. Multimed."},{"key":"2121_CR6","unstructured":"Liu, Z., Zhang, Y., Li, P., Liu, Y., Yang, D.: Dynamic llm-agent network: An llm-agent collaboration framework with agent team optimization. arXiv preprint arXiv:2310.02170 (2023)"},{"issue":"11","key":"2121_CR7","doi-asserted-by":"publisher","first-page":"5292","DOI":"10.1109\/TNNLS.2018.2793863","volume":"29","author":"E Yang","year":"2018","unstructured":"Yang, E., Deng, C., Li, C., Liu, W., Li, J., Tao, D.: Shared predictive cross-modal deep quantization. IEEE Trans. Neural. Netw. Learn. Syst. 29(11), 5292\u20135303 (2018)","journal-title":"IEEE Trans. Neural. Netw. Learn. Syst."},{"key":"2121_CR8","doi-asserted-by":"crossref","unstructured":"Ranjan, V., Rasiwasia, N., Jawahar, C.V.: Multi-label cross-modal retrieval. ICCV, pp. 4094\u20134102 (2015)","DOI":"10.1109\/ICCV.2015.466"},{"key":"2121_CR9","doi-asserted-by":"crossref","unstructured":"Zhen, L., Hu, P., Wang, X., Peng, D.: Deep supervised cross-modal retrieval. CVPR, pp. 10394\u201310403 (2019)","DOI":"10.1109\/CVPR.2019.01064"},{"key":"2121_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2121_CR11","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. International conference on machine learning, pp. 1597\u20131607 (2020). PmLR"},{"key":"2121_CR12","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1007\/978-1-4612-4380-9_14","volume-title":"Breakthroughs in statistics: methodology and distribution","author":"H Hotelling","year":"1992","unstructured":"Hotelling, H.: Relations between two sets of variates. In: Breakthroughs in statistics: methodology and distribution, pp. 162\u2013190. Springer (1992)"},{"issue":"3s","key":"2121_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3575658","volume":"19","author":"J Zhang","year":"2023","unstructured":"Zhang, J., Yu, Y., Tang, S., Wu, J., Li, W.: Variational autoencoder with cca for audio-visual cross-modal retrieval. ACM Trans. Multimed. Comput. Commun. Appl. 19(3s), 1\u201321 (2023)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"issue":"2s","key":"2121_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3564608","volume":"19","author":"D Zeng","year":"2023","unstructured":"Zeng, D., Wu, J., Hattori, G., Xu, R., Yu, Y.: Learning explicit and implicit dual common subspaces for audio-visual cross-modal retrieval. ACM Trans. Multimed. Comput. Commun. Appl. 19(2s), 1\u201323 (2023)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"issue":"2","key":"2121_CR15","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1109\/TMM.2015.2510329","volume":"18","author":"S Qian","year":"2015","unstructured":"Qian, S., Zhang, T., Xu, C., Shao, J.: Multi-modal event topic model for social event analysis. IEEE Trans. Multimed. 18(2), 233\u2013246 (2015)","journal-title":"IEEE Trans. Multimed."},{"key":"2121_CR16","doi-asserted-by":"crossref","unstructured":"Qian, S., Zhang, T., Xu, C.: Multi-modal multi-view topic-opinion mining for social event analysis. Proceedings of the 24th ACM international conference on multimedia, pp. 2\u201311 (2016)","DOI":"10.1145\/2964284.2964294"},{"issue":"3","key":"2121_CR17","first-page":"321","volume":"28","author":"H Harold","year":"1936","unstructured":"Harold, H.: Relations between two sets of variables. Biometrika 28(3), 321\u2013377 (1936)","journal-title":"Biometrika"},{"issue":"6","key":"2121_CR18","doi-asserted-by":"publisher","first-page":"965","DOI":"10.1109\/TCSVT.2013.2276704","volume":"24","author":"X Zhai","year":"2013","unstructured":"Zhai, X., Peng, Y., Xiao, J.: Learning cross-media joint representation with sparse and semisupervised regularization. IEEE Trans. Circuits Syst. Video Technol. 24(6), 965\u2013978 (2013)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"2121_CR19","doi-asserted-by":"crossref","unstructured":"Li, D., Dimitrova, N., Li, M., Sethi, I.K.: Multimedia content processing through cross-modal association. ACM MM, pp. 604\u2013611 (2003)","DOI":"10.1145\/957013.957143"},{"key":"2121_CR20","unstructured":"Andrew, G., Arora, R., Bilmes, J., Livescu, K.: Deep canonical correlation analysis. ICML, pp. 1247\u20131255 (2013)"},{"key":"2121_CR21","doi-asserted-by":"crossref","unstructured":"Hu, P., Peng, X., Zhu, H., Zhen, L., Lin, J.: Learning cross-modal retrieval with noisy labels. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 5403\u20135413 (2021)","DOI":"10.1109\/CVPR46437.2021.00536"},{"key":"2121_CR22","doi-asserted-by":"crossref","unstructured":"Duan, Y., Gu, Z., Ying, Z., Qi, L., Meng, C., Shi, Y.: Pc2: Pseudo-classification based pseudo-captioning for noisy correspondence learning in cross-modal retrieval. Proceedings of the 32nd ACM international conference on multimedia, pp. 9397\u20139406 (2024)","DOI":"10.1145\/3664647.3680860"},{"key":"2121_CR23","doi-asserted-by":"publisher","first-page":"2587","DOI":"10.1109\/TIP.2024.3374221","volume":"33","author":"X Ma","year":"2024","unstructured":"Ma, X., Yang, M., Li, Y., Hu, P., Lv, J., Peng, X.: Cross-modal retrieval with noisy correspondence via consistency refining and mining. IEEE Trans. Image Process. 33, 2587\u20132598 (2024)","journal-title":"IEEE Trans. Image Process."},{"key":"2121_CR24","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1109\/TMM.2023.3267882","volume":"26","author":"Z Yuan","year":"2023","unstructured":"Yuan, Z., Liu, Y., Xu, H., Gao, K.: Noise imitation based adversarial training for robust multimodal sentiment analysis. IEEE Trans. Multimed. 26, 529\u2013539 (2023)","journal-title":"IEEE Trans. Multimed."},{"key":"2121_CR25","unstructured":"Wu, R., Wang, H., Chen, H.-T., Carneiro, G.: Deep multimodal learning with missing modality: a survey. arXiv preprint arXiv:2409.07825 (2024)"},{"key":"2121_CR26","first-page":"38","volume-title":"European conference on computer vision","author":"S Wei","year":"2024","unstructured":"Wei, S., Luo, Y., Wang, Y., Luo, C.: Robust multimodal learning via representation decoupling. In: European conference on computer vision, pp. 38\u201354. Springer (2024)"},{"key":"2121_CR27","doi-asserted-by":"crossref","unstructured":"Reza, M.K., Prater-Bennette, A., Asif, M.S.: Robust multimodal learning with missing modalities via parameter-efficient adaptation. IEEE transactions on pattern analysis and machine intelligence (2024)","DOI":"10.1109\/TPAMI.2024.3476487"},{"key":"2121_CR28","doi-asserted-by":"crossref","unstructured":"Tian, Y., Krishnan, D., Isola, P.: Contrastive multiview coding. European conference on computer vision, pp. 776\u2013794 (2020). Springer","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"2121_CR29","first-page":"18661","volume":"33","author":"P Khosla","year":"2020","unstructured":"Khosla, P., Teterwak, P., Wang, C., Sarna, A., Tian, Y., Isola, P., Maschinot, A., Liu, C., Krishnan, D.: Supervised contrastive learning. Adv. Neural. Inf. Process. Syst. 33, 18661\u201318673 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2121_CR30","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. ICLR (2015)"},{"issue":"45\u201376","key":"2121_CR31","first-page":"26","volume":"1","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart, D.E., Hinton, G.E., McClelland, J.L.: A general framework for parallel distributed processing. Parallel. Distrib. Process: Explor. Microstruct. Cognit. 1(45\u201376), 26 (1986)","journal-title":"Parallel. Distrib. Process: Explor. Microstruct. Cognit."},{"key":"2121_CR32","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Li, F.-F.: Imagenet: A large-scale hierarchical image database. CVPR, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2121_CR33","unstructured":"Oord, A.v.d., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)"},{"key":"2121_CR34","doi-asserted-by":"crossref","unstructured":"Huiskes, M.J., Thomee, B., Lew, M.S.: The mir flickr retrieval evaluation initiative. ICMIR, pp. 39\u201343 (2008)","DOI":"10.1145\/1460096.1460104"},{"key":"2121_CR35","first-page":"740","volume-title":"European conference on computer vision","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: common objects in context. In: European conference on computer vision, pp. 740\u2013755. Springer (2014)"},{"key":"2121_CR36","doi-asserted-by":"crossref","unstructured":"Chua, T.-S., Tang, J., Hong, R., Li, H., Luo, Z., Zheng, Y.: Nus-wide: a real-world web image database from national university of singapore. ICIVR, pp. 48\u201356 (2009)","DOI":"10.1145\/1646396.1646452"},{"key":"2121_CR37","doi-asserted-by":"crossref","unstructured":"Su, S., Zhong, Z., Zhang, C.: Deep joint-semantics reconstructing hashing for large-scale unsupervised cross-modal retrieval. ICCV, pp. 3027\u20133035 (2019)","DOI":"10.1109\/ICCV.2019.00312"},{"issue":"6","key":"2121_CR38","doi-asserted-by":"publisher","first-page":"965","DOI":"10.1109\/TCSVT.2013.2276704","volume":"24","author":"X Zhai","year":"2014","unstructured":"Zhai, X., Peng, Y., Xiao, J.: Learning cross-media joint representation with sparse and semisupervised regularization. IEEE Trans. Circuits Syst. Video Technol. 24(6), 965\u2013978 (2014)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"3\u20134","key":"2121_CR39","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1093\/biomet\/28.3-4.321","volume":"28","author":"H Hotelling","year":"1936","unstructured":"Hotelling, H.: Relations between two sets of variates. Biometrika 28(3\u20134), 321\u2013377 (1936)","journal-title":"Biometrika"},{"key":"2121_CR40","unstructured":"Srivastava, N., Salakhutdinov, R.: Learning representations for multimodal data with deep belief nets. ICML, pp. 1\u20138 (2012)"},{"key":"2121_CR41","doi-asserted-by":"crossref","unstructured":"Feng, F., Wang, X., Li, R.: Cross-modal retrieval with correspondence autoencoder. ACM MM, pp. 7\u201316 (2014)","DOI":"10.1145\/2647868.2654902"},{"key":"2121_CR42","unstructured":"Wang, Z., Zhang, Z., Luo, Y., Huang, Z., Shen, H.T.: Deep collaborative discrete hashing with semantic-invariant structure construction. IEEE transactions on multimedia, 1\u20131 (2020)"},{"key":"2121_CR43","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. ICLR (2015)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02121-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02121-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02121-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T04:20:45Z","timestamp":1770783645000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02121-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,3]]},"references-count":43,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["2121"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02121-9","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,3]]},"assertion":[{"value":"6 November 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The research does not involve human participants and\/or animals. Consent for data used has already been fully informed.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"57"}}