{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T16:37:24Z","timestamp":1772037444108,"version":"3.50.1"},"reference-count":64,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62306178"],"award-info":[{"award-number":["62306178"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003399","name":"STCSM","doi-asserted-by":"publisher","award":["22DZ2229005"],"award-info":[{"award-number":["22DZ2229005"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022ZD0160702"],"award-info":[{"award-number":["2022ZD0160702"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013314","name":"Higher Education Discipline Innovation Project","doi-asserted-by":"publisher","award":["BP0719010"],"award-info":[{"award-number":["BP0719010"]}],"id":[{"id":"10.13039\/501100013314","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1016\/j.neucom.2026.132733","type":"journal-article","created":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T00:34:04Z","timestamp":1768610044000},"page":"132733","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Privileged information assisted learning from noisy correspondence"],"prefix":"10.1016","volume":"672","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-2915-6893","authenticated-orcid":false,"given":"Zihua","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Tianjie","family":"Dai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6790-6490","authenticated-orcid":false,"given":"Mengxi","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6115-5194","authenticated-orcid":false,"given":"Jiangchao","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Han","sequence":"additional","affiliation":[]},{"given":"Ya","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yanfeng","family":"Wang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.132733_bib0005","series-title":"European Conference on Computer Vision","first-page":"348","article-title":"MultiMAE: multi-modal multi-task masked autoencoders","author":"Bachmann","year":"2022"},{"key":"10.1016\/j.neucom.2026.132733_bib0010","series-title":"International Conference on Machine Learning","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"Chen","year":"2020"},{"key":"10.1016\/j.neucom.2026.132733_bib0015","series-title":"Proceedings of the ACM International Conference on Image and Video Retrieval","first-page":"1","article-title":"NUS-WIDE: a real-world web image database from national university of Singapore","author":"Chua","year":"2009"},{"key":"10.1016\/j.neucom.2026.132733_bib0020","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"16670","article-title":"Robust contrastive learning against noisy views","author":"Chuang","year":"2022"},{"key":"10.1016\/j.neucom.2026.132733_bib0025","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"1218","article-title":"Similarity reasoning and filtration for image-text matching","author":"Diao","year":"2021"},{"key":"10.1016\/j.neucom.2026.132733_bib0030","author":"Faghri"},{"key":"10.1016\/j.neucom.2026.132733_bib0035","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1080\/1045988X.1993.9944611","article-title":"Co-teaching: an overview of the past, a glimpse at the present, and considerations for the future","volume":"37","author":"Friend","year":"1993","journal-title":"Prev. Sch. Fail."},{"key":"10.1016\/j.neucom.2026.132733_bib0040","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"15159","article-title":"Learning semantic relationship among instances for image-text matching","author":"Fu","year":"2023"},{"key":"10.1016\/j.neucom.2026.132733_bib0045","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11093","article-title":"Improving zero-shot generalization and robustness of multi-modal models","author":"Ge","year":"2023"},{"key":"10.1016\/j.neucom.2026.132733_bib0050","author":"Han"},{"key":"10.1016\/j.neucom.2026.132733_bib0055","article-title":"Co-teaching: robust training of deep neural networks with extremely noisy labels","volume":"31","author":"Han","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.132733_bib0060","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7517","article-title":"Noisy correspondence learning with meta similarity correction","author":"Han","year":"2023"},{"key":"10.1016\/j.neucom.2026.132733_bib0065","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9729","article-title":"Momentum contrast for unsupervised visual representation learning","author":"He","year":"2020"},{"key":"10.1016\/j.neucom.2026.132733_bib0070","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5403","article-title":"Learning cross-modal retrieval with noisy labels","author":"Hu","year":"2021"},{"key":"10.1016\/j.neucom.2026.132733_bib0075","series-title":"International Conference on Machine Learning","first-page":"9226","article-title":"Modality competition: what makes joint training of multi-modal network fail in deep learning?(provably)","author":"Huang","year":"2022"},{"key":"10.1016\/j.neucom.2026.132733_bib0080","first-page":"7892","article-title":"MACK: multimodal aligned conceptual knowledge for unpaired image-text matching","volume":"35","author":"Huang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.132733_bib0085","series-title":"International Conference on Machine Learning","first-page":"13774","article-title":"Model-aware contrastive learning: towards escaping the dilemmas","author":"Huang","year":"2023"},{"key":"10.1016\/j.neucom.2026.132733_bib0090","first-page":"29406","article-title":"Learning with noisy correspondence for cross-modal matching","volume":"34","author":"Huang","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.132733_bib0095","series-title":"International Conference on Machine Learning","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","author":"Jia","year":"2021"},{"key":"10.1016\/j.neucom.2026.132733_bib0100","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3232","article-title":"Deep cross-modal hashing","author":"Jiang","year":"2017"},{"key":"10.1016\/j.neucom.2026.132733_bib0105","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1038\/s41597-019-0322-0","article-title":"MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports","volume":"6","author":"Johnson","year":"2019","journal-title":"Sci. Data"},{"key":"10.1016\/j.neucom.2026.132733_bib0110","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9676","article-title":"UNICON: combating label noise through uniform selection and contrastive learning","author":"Karim","year":"2022"},{"key":"10.1016\/j.neucom.2026.132733_bib0115","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3128","article-title":"Deep visual-semantic alignments for generating image descriptions","author":"Karpathy","year":"2015"},{"key":"10.1016\/j.neucom.2026.132733_bib0120","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Efficient large-scale multi-modal classification","author":"Kiela","year":"2018"},{"key":"10.1016\/j.neucom.2026.132733_bib0125","author":"Kingma"},{"key":"10.1016\/j.neucom.2026.132733_bib0130","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"201","article-title":"Stacked cross attention for image-text matching","author":"Lee","year":"2018"},{"key":"10.1016\/j.neucom.2026.132733_bib0135","series-title":"International Conference on Machine Learning","first-page":"19730","article-title":"BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models","author":"Li","year":"2023"},{"key":"10.1016\/j.neucom.2026.132733_bib0140","series-title":"International Conference on Machine Learning","first-page":"12888","article-title":"BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation","author":"Li","year":"2022"},{"key":"10.1016\/j.neucom.2026.132733_bib0145","author":"Li"},{"key":"10.1016\/j.neucom.2026.132733_bib0150","series-title":"Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part v 13","first-page":"740","article-title":"Microsoft COCO: common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.neucom.2026.132733_bib0155","first-page":"20331","article-title":"Early-learning regularization prevents memorization of noisy labels","volume":"33","author":"Liu","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.132733_bib0160","series-title":"International Conference on Machine Learning","first-page":"14153","article-title":"Robust training under label noise by over-parameterization","author":"Liu","year":"2022"},{"key":"10.1016\/j.neucom.2026.132733_bib0165","doi-asserted-by":"crossref","first-page":"1332","DOI":"10.1109\/LSP.2022.3178899","article-title":"Regularizing visual semantic embedding with contrastive learning for image-text matching","volume":"29","author":"Liu","year":"2022","journal-title":"IEEE Signal Process. Lett."},{"key":"10.1016\/j.neucom.2026.132733_bib0170","article-title":"ViLBERT: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","volume":"32","author":"Lu","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.132733_bib0175","doi-asserted-by":"crossref","first-page":"9082","DOI":"10.1109\/TMM.2023.3245400","article-title":"Adaptive marginalized semantic hashing for unpaired cross-modal retrieval","volume":"25","author":"Luo","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.neucom.2026.132733_bib0180","doi-asserted-by":"crossref","first-page":"2587","DOI":"10.1109\/TIP.2024.3374221","article-title":"Cross-modal retrieval with noisy correspondence via consistency refining and mining","volume":"33","author":"Ma","year":"2024","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.neucom.2026.132733_bib0185","doi-asserted-by":"crossref","first-page":"187","DOI":"10.1109\/TPAMI.2019.2927476","article-title":"Recipe1M+: a dataset for learning cross-modal embeddings for cooking recipes and food images","volume":"43","author":"Mar\u0131n","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.132733_bib0190","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"122","article-title":"Multi-modal domain adaptation for fine-grained action recognition","author":"Munro","year":"2020"},{"key":"10.1016\/j.neucom.2026.132733_bib0195","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"19275","article-title":"Fine-grained image-text matching by cross-modal hard aligning network","author":"Pan","year":"2023"},{"key":"10.1016\/j.neucom.2026.132733_bib0200","series-title":"Proceedings of the 30th ACM International Conference on Multimedia","first-page":"4948","article-title":"Deep evidential learning with noisy correspondence for cross-modal retrieval","author":"Qin","year":"2022"},{"key":"10.1016\/j.neucom.2026.132733_bib0205","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.neucom.2026.132733_bib0210","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3558","article-title":"Conceptual 12m: pushing web-scale image-text pre-training to recognize long-tail visual concepts","author":"Changpinyo","year":"2021"},{"key":"10.1016\/j.neucom.2026.132733_bib0215","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"3677","article-title":"On the adversarial robustness of multi-modal foundation models","author":"Schlarmann","year":"2023"},{"key":"10.1016\/j.neucom.2026.132733_bib0220","series-title":"Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"2556","article-title":"Conceptual captions: a cleaned, hypernymed, image alt-text dataset for automatic image captioning","author":"Sharma","year":"2018"},{"key":"10.1016\/j.neucom.2026.132733_bib0225","doi-asserted-by":"crossref","first-page":"8135","DOI":"10.1109\/TNNLS.2022.3152527","article-title":"Learning from noisy labels with deep neural networks: a survey","volume":"34","author":"Song","year":"2023","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.neucom.2026.132733_bib0230","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"3147","article-title":"Too large; data reduction for vision-language pre-training","author":"Wang","year":"2023"},{"key":"10.1016\/j.neucom.2026.132733_bib0235","series-title":"International Conference on Learning Representations","article-title":"Robust early-learning: hindering the memorization of noisy labels","author":"Xia","year":"2020"},{"key":"10.1016\/j.neucom.2026.132733_bib0240","series-title":"Proceedings of the 30th ACM International Conference on Multimedia","first-page":"629","article-title":"Early-learning regularized contrastive learning for cross-modal retrieval with noisy labels","author":"Xu","year":"2022"},{"key":"10.1016\/j.neucom.2026.132733_bib0245","doi-asserted-by":"crossref","first-page":"7833","DOI":"10.1109\/TPAMI.2024.3394552","article-title":"Searching to exploit memorization effect in deep learning with noisy labels","volume":"46","author":"Yang","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.132733_bib0250","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"19883","article-title":"BiCro: noisy correspondence rectification for multi-modality data via bi-directional cross-modal similarity consistency","author":"Yang","year":"2023"},{"key":"10.1016\/j.neucom.2026.132733_bib0255","doi-asserted-by":"crossref","first-page":"1478","DOI":"10.1109\/TIP.2025.3574918","article-title":"UCPM: uncertainty-guided cross-modal retrieval with partially mismatched pairs","volume":"34","author":"Zha","year":"2025","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.neucom.2026.132733_bib0260","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"27381","article-title":"Mitigating noisy correspondence by geometrical structure consistency learning","author":"Zhao","year":"2024"},{"key":"10.1016\/j.neucom.2026.132733_bib0265","author":"Zhao"},{"key":"10.1016\/j.neucom.2026.132733_bib0270","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"10394","article-title":"Deep supervised cross-modal retrieval","author":"Zhen","year":"2019"},{"key":"10.1016\/j.neucom.2026.132733_bib0275","series-title":"International Conference on Machine Learning","first-page":"7164","article-title":"How does disagreement help generalization against label corruption?","author":"Yu","year":"2019"},{"key":"10.1016\/j.neucom.2026.132733_bib0280","series-title":"Advances in Neural Information Processing Systems","article-title":"Masking: a new perspective of noisy supervision","volume":"vol. 31","author":"Han","year":"2018"},{"key":"10.1016\/j.neucom.2026.132733_bib0285","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"9103","article-title":"Safeguarded dynamic label regression for noisy supervision","volume":"vol. 33","author":"Yao","year":"2019"},{"key":"10.1016\/j.neucom.2026.132733_bib0290","doi-asserted-by":"crossref","first-page":"9964","DOI":"10.1109\/TPAMI.2023.3247629","article-title":"Latent class-conditional noise model","volume":"45","author":"Yao","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.132733_bib0295","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"30763","article-title":"Multi-modal medical diagnosis via large-small model collaboration","author":"Chen","year":"2025"},{"key":"10.1016\/j.neucom.2026.132733_bib0300","author":"Zhou"},{"key":"10.1016\/j.neucom.2026.132733_bib0305","series-title":"Advances in Neural Information Processing Systems","first-page":"36218","article-title":"Probabilistic conformal distillation for enhancing missing modality robustness","volume":"vol. 37","author":"Chen","year":"2024"},{"key":"10.1016\/j.neucom.2026.132733_bib0310","author":"Hong"},{"key":"10.1016\/j.neucom.2026.132733_bib0315","doi-asserted-by":"crossref","first-page":"106","DOI":"10.1007\/s10994-025-06745-w","article-title":"Uncover the balanced geometry in long-tailed contrastive language-image pretraining","volume":"114","author":"Zhou","year":"2025","journal-title":"Mach. Learn."},{"key":"10.1016\/j.neucom.2026.132733_bib0320","author":"Hong"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S092523122600130X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S092523122600130X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T15:44:56Z","timestamp":1772034296000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S092523122600130X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":64,"alternative-id":["S092523122600130X"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.132733","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Privileged information assisted learning from noisy correspondence","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.132733","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"132733"}}