{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T12:39:47Z","timestamp":1776775187681,"version":"3.51.2"},"reference-count":55,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T00:00:00Z","timestamp":1775088000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neucom.2026.133450","type":"journal-article","created":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T18:11:55Z","timestamp":1774462315000},"page":"133450","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Cross-modal progressive modeling for neuro-visual representation learning"],"prefix":"10.1016","volume":"683","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-3040-4097","authenticated-orcid":false,"given":"Yueming","family":"Sun","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiyao","family":"Pu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kaili","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2076-7597","authenticated-orcid":false,"given":"Zeyu","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoran","family":"Duan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2445-6112","authenticated-orcid":false,"given":"Yang","family":"Long","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.133450_bib0005","author":"Liu"},{"key":"10.1016\/j.neucom.2026.133450_bib0010","unstructured":"A. Radford, J.W. Kim, C. Hallacy, A. Ramesh, G. Goh, S. Agarwal, G. Sastry, A. Askell, P. Mishkin, J. Clark, G. Krueger, I. Sutskever, Learning transferable visual models from natural language supervision, CoRR abs\/2103.00020, arXiv:2103.00020 (2021) https:\/\/arxiv.org\/abs\/2103.00020"},{"key":"10.1016\/j.neucom.2026.133450_bib0015","series-title":"International Conference on Learning Representations","article-title":"Decoding natural images from EEG for object recognition","author":"Song","year":"2024"},{"key":"10.1016\/j.neucom.2026.133450_bib0020","author":"Chen"},{"key":"10.1016\/j.neucom.2026.133450_bib0025","author":"Li"},{"issue":"9","key":"10.1016\/j.neucom.2026.133450_bib0030","doi-asserted-by":"crossref","first-page":"10760","DOI":"10.1109\/TPAMI.2023.3263181","article-title":"Decoding visual neural representations by multimodal learning of brain-visual-linguistic features","volume":"45","author":"Du","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133450_bib0035","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"Bridging the vision-brain gap with an uncertainty-aware blur prior","author":"Wu","year":"2025"},{"key":"10.1016\/j.neucom.2026.133450_bib0040","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1080\/2326263X.2017.1297192","article-title":"Riemannian geometry for EEG-based brain-computer interfaces; a primer and a review","volume":"4","author":"Congedo","year":"2017","journal-title":"Brain-Comput. Interfaces"},{"key":"10.1016\/j.neucom.2026.133450_bib0045","doi-asserted-by":"crossref","first-page":"3786","DOI":"10.3390\/s21113786","article-title":"A review of EEG signal features and their application in driver drowsiness detection systems","volume":"21","author":"Stancin","year":"2021","journal-title":"Sensors"},{"key":"10.1016\/j.neucom.2026.133450_bib0050","doi-asserted-by":"crossref","first-page":"201","DOI":"10.3390\/brainsci9080201","article-title":"EEG signals feature extraction based on DWT and EMD combined with approximate entropy","volume":"9","author":"Ji","year":"2019","journal-title":"Brain Sci."},{"key":"10.1016\/j.neucom.2026.133450_bib0055","author":"Fu"},{"key":"10.1016\/j.neucom.2026.133450_bib0060","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2025.114199","article-title":"MCAN: cross-domain self-supervised attention network based on multiscale EEG feature learning for epileptic seizure detection","author":"Hong","year":"2025","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.neucom.2026.133450_bib0065","series-title":"Forty-First International Conference on Machine Learning","article-title":"Vector quantization pretraining for EEG time series with random projection and phase alignment","author":"GUI","year":"2024"},{"key":"10.1016\/j.neucom.2026.133450_bib0070","series-title":"International Conference on Machine Learning","first-page":"2777","article-title":"Sliced-Wasserstein on symmetric positive definite matrices for M\/EEG signals","author":"Bonet","year":"2023"},{"key":"10.1016\/j.neucom.2026.133450_bib0075","author":"Choi"},{"key":"10.1016\/j.neucom.2026.133450_bib0080","author":"Guo"},{"key":"10.1016\/j.neucom.2026.133450_bib0085","doi-asserted-by":"crossref","DOI":"10.1016\/j.compbiomed.2024.108701","article-title":"Decoding visual brain representations from electroencephalography through knowledge distillation and latent diffusion models","volume":"178","author":"Ferrante","year":"2024","journal-title":"Comput. Biol. Med."},{"key":"10.1016\/j.neucom.2026.133450_bib0090","doi-asserted-by":"crossref","DOI":"10.1038\/s41598-024-66228-1","article-title":"Image classification and reconstruction from low-density EEG","volume":"14","author":"Guenther","year":"2024","journal-title":"Sci. Rep."},{"key":"10.1016\/j.neucom.2026.133450_bib0095","series-title":"Computer Vision \u2013 ECCV 2024, Lecture Notes in Computer Science","first-page":"472","article-title":"DreamDiffusion: high-quality EEG-to-image generation with temporal masked signal modeling and clip alignment","author":"Bai","year":"2024"},{"key":"10.1016\/j.neucom.2026.133450_bib0100","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"7553","article-title":"Learning robust deep visual representations from EEG brain recordings","author":"Singh","year":"2024"},{"key":"10.1016\/j.neucom.2026.133450_bib0105","doi-asserted-by":"crossref","first-page":"478","DOI":"10.3390\/brainsci14050478","article-title":"A new framework combining diffusion models and the convolution classifier for generating images from EEG signals","volume":"14","author":"Yang","year":"2024","journal-title":"Brain Sci."},{"key":"10.1016\/j.neucom.2026.133450_bib0110","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1016\/j.tics.2014.01.002","article-title":"Characterizing the dynamics of mental representations: the temporal generalization method","volume":"18","author":"King","year":"2014","journal-title":"Trends Cogn. Sci."},{"key":"10.1016\/j.neucom.2026.133450_bib0115","first-page":"241","article-title":"Decoding dynamic brain patterns from evoked responses: a tutorial on multivariate pattern analysis applied to time series neuroimaging data","volume":"11","author":"Grootswagers","year":"2017","journal-title":"Front. Neurosci."},{"key":"10.1016\/j.neucom.2026.133450_bib0120","doi-asserted-by":"crossref","first-page":"1097","DOI":"10.1038\/s42256-023-00714-5","article-title":"Decoding speech perception from non-invasive brain recordings","volume":"5","author":"D\u00e9fossez","year":"2023","journal-title":"Nat. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133450_bib0125","article-title":"Across-subject offline decoding of motor imagery from MEG and EEG","volume":"8","author":"Halme","year":"2018","journal-title":"Sci. Rep."},{"key":"10.1016\/j.neucom.2026.133450_bib0130","doi-asserted-by":"crossref","first-page":"3324","DOI":"10.1002\/hbm.26284","article-title":"Mapping and decoding cortical engagement during motor imagery, mental arithmetic, and silent word generation using MEG","volume":"44","author":"Youssofzadeh","year":"2023","journal-title":"Hum. Brain Mapp."},{"key":"10.1016\/j.neucom.2026.133450_bib0135","doi-asserted-by":"crossref","first-page":"7627","DOI":"10.1109\/TNNLS.2024.3387577","article-title":"Align while fusion: a generalized nonaligned multiview multilabel classification method","volume":"36","author":"Zhong","year":"2025","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.neucom.2026.133450_bib0140","doi-asserted-by":"crossref","first-page":"6159","DOI":"10.1109\/TKDE.2025.3590482","article-title":"Simplified graph contrastive learning model without augmentation","volume":"37","author":"Lin","year":"2025","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.neucom.2026.133450_bib0145","author":"Liang"},{"key":"10.1016\/j.neucom.2026.133450_bib0150","article-title":"Tuple perturbation-based contrastive learning framework for multimodal remote sensing image semantic segmentation","volume":"63","author":"Ye","year":"2025","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.neucom.2026.133450_bib0155","author":"Oquab"},{"key":"10.1016\/j.neucom.2026.133450_bib0160","series-title":"Advances in Neural Information Processing Systems","first-page":"29996","article-title":"SimMTM: a simple pre-training framework for masked time-series modeling","volume":"vol. 36","author":"Dong","year":"2023"},{"key":"10.1016\/j.neucom.2026.133450_bib0165","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"14507","article-title":"AdaMAE: adaptive masking for efficient spatiotemporal learning with masked autoencoders","author":"Bandara","year":"2023"},{"key":"10.1016\/j.neucom.2026.133450_bib0170","author":"Hendrycks"},{"key":"10.1016\/j.neucom.2026.133450_bib0175","series-title":"Proceedings of the 13th International Conference on Artificial Intelligence and Statistics (AISTATS)","first-page":"297","article-title":"Noise-contrastive estimation: a new estimation principle for unnormalized statistical models","author":"Gutmann","year":"2010"},{"key":"10.1016\/j.neucom.2026.133450_bib0180","doi-asserted-by":"crossref","DOI":"10.1016\/j.neuroimage.2022.119754","article-title":"A large and rich EEG dataset for modeling human visual object recognition","volume":"264","author":"Gifford","year":"2022","journal-title":"NeuroImage"},{"key":"10.1016\/j.neucom.2026.133450_bib0185","doi-asserted-by":"crossref","first-page":"2633","DOI":"10.1167\/jov.21.9.2633","article-title":"THINGS-fMRI\/MEG: a large-scale multimodal neuroimaging dataset of responses to natural object images","volume":"21","author":"Contier","year":"2021","journal-title":"J. Vis."},{"key":"10.1016\/j.neucom.2026.133450_bib0190","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"6809","article-title":"Deep learning human mind for automated visual classification","author":"Spampinato","year":"2017"},{"key":"10.1016\/j.neucom.2026.133450_bib0195","doi-asserted-by":"crossref","first-page":"668","DOI":"10.1016\/j.neuroimage.2018.12.046","article-title":"The representational dynamics of visual objects in rapid serial visual processing streams","volume":"188","author":"Grootswagers","year":"2019","journal-title":"NeuroImage"},{"key":"10.1016\/j.neucom.2026.133450_bib0200","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1162\/089892901564199","article-title":"The speed of sight","volume":"13","author":"Keysers","year":"2001","journal-title":"J. Cogn. Neurosci."},{"key":"10.1016\/j.neucom.2026.133450_bib0205","first-page":"604","article-title":"Rapid conceptual identification of sequentially presented pictures","volume":"7","author":"Intraub","year":"1981","journal-title":"J. Exp. Psychol.: Hum. Percept. Perform."},{"key":"10.1016\/j.neucom.2026.133450_bib0210","unstructured":"G. Ilharco, M. Wortsman, R. Wightman, C. Gordon, N. Carlini, R. Taori, A. Dave, V. Shankar, H. Namkoong, J. Miller, et al., OpenCLIP, If you use this software, please cite it as below 7, (2021)."},{"key":"10.1016\/j.neucom.2026.133450_bib0215","series-title":"International Conference on Learning Representations (ICLR)","article-title":"Adam: a method for stochastic optimization","author":"Kingma","year":"2015"},{"key":"10.1016\/j.neucom.2026.133450_bib0220","series-title":"Proceedings of the 32nd ACM International Conference on Multimedia","first-page":"8992","article-title":"MB2C: multimodal bidirectional cycle consistency for learning robust visual neural representations","author":"Wei","year":"2024"},{"key":"10.1016\/j.neucom.2026.133450_bib0225","doi-asserted-by":"crossref","first-page":"3833","DOI":"10.1109\/TPAMI.2020.2995909","article-title":"Decoding brain representations by multimodal learning of neural activity and visual features","volume":"43","author":"Palazzo","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133450_bib0230","doi-asserted-by":"crossref","first-page":"6761","DOI":"10.1109\/TPAMI.2024.3386927","article-title":"DeepNet: scaling transformers to 1, 000 layers","volume":"46","author":"Wang","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133450_bib0235","doi-asserted-by":"crossref","DOI":"10.1088\/1741-2552\/aace8c","article-title":"EEGNet: a compact convolutional neural network for EEG-based brain\u2013computer interfaces","volume":"15","author":"Lawhern","year":"2018","journal-title":"J. Neural Eng."},{"key":"10.1016\/j.neucom.2026.133450_bib0240","series-title":"International Conference on Neural Information Processing","first-page":"633","article-title":"ShallowNet: an efficient lightweight text detection network based on instance count-aware supervision information","author":"Hu","year":"2021"},{"key":"10.1016\/j.neucom.2026.133450_bib0245","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.neucom.2026.133450_bib0250","author":"Dosovitskiy"},{"key":"10.1016\/j.neucom.2026.133450_bib0255","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"12104","article-title":"Scaling vision transformers","author":"Zhai","year":"2022"},{"key":"10.1016\/j.neucom.2026.133450_bib0260","series-title":"Proceedings of the IEEE International Conference on Computer Vision (ICCV)","first-page":"618","article-title":"Grad-CAM: visual explanations from deep networks via gradient-based localization","author":"Selvaraju","year":"2017"},{"key":"10.1016\/j.neucom.2026.133450_bib0265","doi-asserted-by":"crossref","first-page":"446","DOI":"10.1016\/j.neuroimage.2013.10.027","article-title":"MNE software for processing MEG and EEG data","volume":"86","author":"Gramfort","year":"2014","journal-title":"neuroimage"},{"key":"10.1016\/j.neucom.2026.133450_bib0270","doi-asserted-by":"crossref","first-page":"772","DOI":"10.1016\/j.neuron.2020.07.001","article-title":"AM\/EEG-fMRI fusion primer: resolving human brain responses in space and time","volume":"107","author":"Cichy","year":"2020","journal-title":"Neuron"},{"key":"10.1016\/j.neucom.2026.133450_bib0275","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226008477?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226008477?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T11:44:43Z","timestamp":1776771883000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226008477"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":55,"alternative-id":["S0925231226008477"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133450","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Cross-modal progressive modeling for neuro-visual representation learning","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133450","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"133450"}}