{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:54:12Z","timestamp":1781538852017,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Strategic Priority Research Program of the Chinese Academy of Sciences","award":["XDB0930000"],"award-info":[{"award-number":["XDB0930000"]}]},{"name":"Guangdong Provincial Key Laboratory of Multimodality Non-Invasive Brain-Computer Interfaces","award":["2024B1212010010"],"award-info":[{"award-number":["2024B1212010010"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810885","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"108-117","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["NeuroAlign: Dynamic Dual-Stream Alignment of Perception and Cognition for Zero-Shot Brain-Image Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8265-5152","authenticated-orcid":false,"given":"Yixing","family":"Ke","sequence":"first","affiliation":[{"name":"Southern University of Science and Technology, Shenzhen, China; Shenzhen University of Advanced Technology, Shenzhen, China and Shenzhen Institute of Advanced Technology\uff0cChinese Academy of Sciences, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1358-9777","authenticated-orcid":false,"given":"Dong","family":"Liang","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Advanced Technology\uff0cChinese Academy of Sciences, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3826-1102","authenticated-orcid":false,"given":"Kun","family":"Shang","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Advanced Technology\uff0cChinese Academy of Sciences, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","volume-title":"ECCV (31)","author":"Bai Yunpeng","year":"2024","unstructured":"Yunpeng Bai, Xintao Wang, Yan-Pei Cao, Yixiao Ge, Chun Yuan, and Ying Shan. 2024. DreamDiffusion: High-Quality EEG-to-Image Generation with Temporal Masked Signal Modeling and CLIP Alignment. In ECCV (31)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"crossref","unstructured":"Moshe Bar Karim\u00a0S Kassam Avniel\u00a0Singh Ghuman Jasmine Boshyan Annette\u00a0M Schmid Anders\u00a0M Dale Matti\u00a0S H\u00e4m\u00e4l\u00e4inen Ksenija Marinkovic Daniel\u00a0L Schacter Bruce\u00a0R Rosen et\u00a0al. 2006. Top-down facilitation of visual recognition. Proceedings of the national academy of sciences 103 2 (2006) 449\u2013454.","DOI":"10.1073\/pnas.0507062103"},{"key":"e_1_3_3_1_4_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Benchetrit Yohann","year":"2024","unstructured":"Yohann Benchetrit, Hubert Banville, and Jean-Remi King. 2024. Brain decoding: toward real-time reconstruction of visual perception. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"e_1_3_3_1_6_2","unstructured":"Hongzhou Chen Lianghua He Yihang Liu and Longzhen Yang. 2024. Visual neural decoding via improved visual-EEG semantic consistency. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.06788 (2024)."},{"key":"e_1_3_3_1_7_2","first-page":"794","volume-title":"International conference on machine learning","author":"Chen Zhao","year":"2018","unstructured":"Zhao Chen, Vijay Badrinarayanan, Chen-Yu Lee, and Andrew Rabinovich. 2018. Gradnorm: Gradient normalization for adaptive loss balancing in deep multitask networks. In International conference on machine learning. PMLR, 794\u2013803."},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Rocco Chiou and Matthew A\u00a0Lambon Ralph. 2016. The anterior temporal cortex is a primary semantic source of top-down influences on object recognition. Cortex 79 (2016) 75\u201386.","DOI":"10.1016\/j.cortex.2016.03.007"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Minkyu Choi Kuan Han Xiaokai Wang Yizhen Zhang and Zhongming Liu. 2023. A dual-stream neural network explains the functional segregation of dorsal and ventral visual pathways in human brains. Advances in Neural Information Processing Systems 36 (2023) 50408\u201350428.","DOI":"10.52202\/075280-2193"},{"key":"e_1_3_3_1_10_2","unstructured":"Minsuk Choi and Hiroshi Ishikawa. 2024. BrainDecoder: Style-based visual decoding of EEG signals. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.05279 (2024)."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Radoslaw\u00a0Martin Cichy Aditya Khosla Dimitrios Pantazis Antonio Torralba and Aude Oliva. 2016. Comparison of deep neural networks to spatio-temporal cortical dynamics of human visual object recognition reveals hierarchical correspondence. Scientific reports 6 1 (2016) 27755.","DOI":"10.1038\/srep27755"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Radoslaw\u00a0Martin Cichy Dimitrios Pantazis and Aude Oliva. 2014. Resolving human object recognition in space and time. Nature neuroscience 17 3 (2014) 455\u2013462.","DOI":"10.1038\/nn.3635"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Maurizio Corbetta and Gordon\u00a0L Shulman. 2002. Control of goal-directed and stimulus-driven attention in the brain. Nature reviews neuroscience 3 3 (2002) 201\u2013215.","DOI":"10.1038\/nrn755"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"James\u00a0J DiCarlo and David\u00a0D Cox. 2007. Untangling invariant object recognition. Trends in cognitive sciences 11 8 (2007) 333\u2013341.","DOI":"10.1016\/j.tics.2007.06.010"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Changde Du Kaicheng Fu Jinpeng Li and Huiguang He. 2023. Decoding visual neural representations by multimodal learning of brain-visual-linguistic features. IEEE Transactions on Pattern Analysis and Machine Intelligence 45 9 (2023) 10760\u201310777.","DOI":"10.1109\/TPAMI.2023.3263181"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Guy Gaziv Roman Beliy Niv Granot Assaf Hoogi Francesca Strappini Tal Golan and Michal Irani. 2022. Self-supervised natural image reconstruction and large-scale semantic classification from brain activity. NeuroImage 254 (2022) 119121.","DOI":"10.1016\/j.neuroimage.2022.119121"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Kuntal Ghosh Sandip Sarkar and Kamales Bhaumik. 2005. A possible mechanism of zero-crossing detection using the concept of the extended classical receptive field of retinal ganglion cells. Biological Cybernetics 93 1 (2005) 1\u20135.","DOI":"10.1007\/s00422-005-0580-0"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Alessandro\u00a0T Gifford Kshitij Dwivedi Gemma Roig and Radoslaw\u00a0M Cichy. 2022. A large and rich EEG dataset for modeling human visual object recognition. NeuroImage 264 (2022) 119754.","DOI":"10.1016\/j.neuroimage.2022.119754"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Tijl Grootswagers Amanda\u00a0K Robinson and Thomas\u00a0A Carlson. 2019. The representational dynamics of visual objects in rapid serial visual processing streams. NeuroImage 188 (2019) 668\u2013679.","DOI":"10.1016\/j.neuroimage.2018.12.046"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Martin\u00a0N Hebart Oliver Contier Lina Teichmann Adam\u00a0H Rockter Charles\u00a0Y Zheng Alexis Kidder Anna Corriveau Maryam Vaziri-Pashkam and Chris\u00a0I Baker. 2023. THINGS-data a multimodal collection of large-scale datasets for investigating object representations in human brain and behavior. Elife 12 (2023) e82580.","DOI":"10.7554\/eLife.82580"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"David\u00a0H Hubel and Torsten\u00a0N Wiesel. 1968. Receptive fields and functional architecture of monkey striate cortex. The Journal of physiology 195 1 (1968) 215\u2013243.","DOI":"10.1113\/jphysiol.1968.sp008455"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Alexander\u00a0G Huth Wendy\u00a0A De\u00a0Heer Thomas\u00a0L Griffiths Fr\u00e9d\u00e9ric\u00a0E Theunissen and Jack\u00a0L Gallant. 2016. Natural speech reveals the semantic maps that tile human cerebral cortex. Nature 532 7600 (2016) 453\u2013458.","DOI":"10.1038\/nature17637"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Helene Intraub. 1981. Rapid conceptual identification of sequentially presented pictures. Journal of Experimental Psychology: Human Perception and Performance 7 3 (1981) 604.","DOI":"10.1037\/0096-1523.7.3.604"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Laurent Itti and Christof Koch. 2000. A saliency-based search mechanism for overt and covert shifts of visual attention. Vision research 40 10-12 (2000) 1489\u20131506.","DOI":"10.1016\/S0042-6989(99)00163-7"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Kohitij Kar Jonas Kubilius Kailyn Schmidt Elias\u00a0B Issa and James\u00a0J DiCarlo. 2019. Evidence that recurrent circuits are critical to the ventral stream\u2019s execution of core object recognition behavior. Nature neuroscience 22 6 (2019) 974\u2013983.","DOI":"10.1038\/s41593-019-0392-5"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Christian Keysers D-K Xiao Peter F\u00f6ldi\u00e1k and David\u00a0I Perrett. 2001. The speed of sight. Journal of cognitive neuroscience 13 1 (2001) 90\u2013101.","DOI":"10.1162\/089892901564199"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW69036.2025.00506"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"Solomon Kullback and Richard\u00a0A Leibler. 1951. On information and sufficiency. The annals of mathematical statistics 22 1 (1951) 79\u201386.","DOI":"10.1214\/aoms\/1177729694"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Dongyang Li Chen Wei Shiying Li Jiachen Zou and Quanying Liu. 2024. Visual Decoding and Reconstruction via EEG Embeddings with Guided Diffusion. Advances in Neural Information Processing Systems 37 (2024) 102822\u2013102864.","DOI":"10.52202\/079017-3266"},{"key":"e_1_3_3_1_31_2","first-page":"19730","volume-title":"International conference on machine learning","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730\u201319742."},{"key":"e_1_3_3_1_32_2","unstructured":"Yueyang Li Zijian Kang Shengyu Gong Wenhao Dong Weiming Zeng Hongjie Yan Wai\u00a0Ting Siok and Nizhuan Wang. 2024. Neural-mcrl: Neural multimodal contrastive representation learning for eeg-based visual decoding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.17337 (2024)."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICME59968.2025.11210130"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Victor\u00a0Weixin Liang Yuhui Zhang Yongchan Kwon Serena Yeung and James\u00a0Y Zou. 2022. Mind the gap: Understanding the modality gap in multi-modal contrastive representation learning. Advances in Neural Information Processing Systems 35 (2022) 17612\u201317625.","DOI":"10.52202\/068431-1280"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Sikun Lin Thomas Sprague and Ambuj\u00a0K Singh. 2022. Mind reader: Reconstructing complex images from brain activities. Advances in Neural Information Processing Systems 35 (2022) 29624\u201329636.","DOI":"10.52202\/068431-2148"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Dongjun Liu Weichen Dai Hangkui Zhang Xuanyu Jin Jianting Cao and Wanzeng Kong. 2023. Brain-machine coupled learning method for facial emotion recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence 45 9 (2023) 10703\u201310717.","DOI":"10.1109\/TPAMI.2023.3257846"},{"key":"e_1_3_3_1_37_2","volume-title":"ICML 2024 AI for Science Workshop","author":"Liu Hanwen","year":"2024","unstructured":"Hanwen Liu, Daniel Hajialigol, Benny Antony, Aiguo Han, and Xuan Wang. 2024. EEG2TEXT: Open Vocabulary EEG-to-Text Decoding with EEG Pre-Training and Multi-View Transformer. In ICML 2024 AI for Science Workshop."},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"crossref","unstructured":"Xiu-Yun Liu Wen-Long Wang Miao Liu Ming-Yi Chen T\u00e2nia Pereira Desta\u00a0Yakob Doda Yu-Feng Ke Shou-Yan Wang Dong Wen Xiao-Guang Tong et\u00a0al. 2025. Recent applications of EEG-based brain-computer-interface in the medical field. Military Medical Research 12 1 (2025) 14.","DOI":"10.1186\/s40779-025-00598-z"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"crossref","unstructured":"David Marr and Ellen Hildreth. 1980. Theory of edge detection. Proceedings of the Royal Society of London. Series B. Biological Sciences 207 1167 (1980) 187\u2013217.","DOI":"10.1098\/rspb.1980.0020"},{"key":"e_1_3_3_1_41_2","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Mistretta Marco","year":"2025","unstructured":"Marco Mistretta, Alberto Baldrati, Lorenzo Agnolucci, Marco Bertini, and Andrew\u00a0D Bagdanov. 2025. Cross the Gap: Exposing the Intra-modal Misalignment in CLIP via Modality Inversion. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19809-0_30"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"crossref","unstructured":"Simone Palazzo Concetto Spampinato Isaak Kavasidis Daniela Giordano Joseph Schmidt and Mubarak Shah. 2020. Decoding brain representations by multimodal learning of neural activity and visual features. IEEE Transactions on Pattern Analysis and Machine Intelligence 43 11 (2020) 3833\u20133849.","DOI":"10.1109\/TPAMI.2020.2995909"},{"key":"e_1_3_3_1_44_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"crossref","unstructured":"Martin Schrimpf Idan\u00a0Asher Blank Greta Tuckute Carina Kauf Eghbal\u00a0A Hosseini Nancy Kanwisher Joshua\u00a0B Tenenbaum and Evelina Fedorenko. 2021. The neural architecture of language: Integrative modeling converges on predictive processing. Proceedings of the National Academy of Sciences 118 45 (2021) e2105646118.","DOI":"10.1073\/pnas.2105646118"},{"key":"e_1_3_3_1_46_2","first-page":"3145","volume-title":"International conference on machine learning","author":"Shrikumar Avanti","year":"2017","unstructured":"Avanti Shrikumar, Peyton Greenside, and Anshul Kundaje. 2017. Learning important features through propagating activation differences. In International conference on machine learning. PMlR, 3145\u20133153."},{"key":"e_1_3_3_1_47_2","volume-title":"International Conference on Learning Representations","author":"Song Yonghao","year":"2024","unstructured":"Yonghao Song, Bingchuan Liu, Xiang Li, Nanlin Shi, Yijun Wang, and Xiaorong Gao. 2024. Decoding Natural Images from EEG for Object Recognition. In International Conference on Learning Representations."},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"crossref","unstructured":"Yonghao Song Yijun Wang Huiguang He and Xiaorong Gao. 2025. Recognizing Natural Images From EEG With Language-Guided Contrastive Learning. IEEE Transactions on Neural Networks and Learning Systems (2025).","DOI":"10.1109\/TNNLS.2025.3562743"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.479"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01389"},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"crossref","unstructured":"Simon Thorpe Denis Fize and Catherine Marlot. 1996. Speed of processing in the human visual system. nature 381 6582 (1996) 520\u2013522.","DOI":"10.1038\/381520a0"},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01381"},{"key":"e_1_3_3_1_53_2","unstructured":"Peng Wang Shuai Bai Sinan Tan Shijie Wang Zhihao Fan Jinze Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge et\u00a0al. 2024. Qwen2-vl: Enhancing vision-language model\u2019s perception of the world at any resolution. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.12191 (2024)."},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00215"},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"crossref","unstructured":"Guangyu Yang and Jinguo Liu. 2024. A new framework combining diffusion models and the convolution classifier for generating images from EEG signals. Brain Sciences 14 5 (2024) 478.","DOI":"10.3390\/brainsci14050478"},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"crossref","unstructured":"Zesheng Ye Lina Yao Yu Zhang and Sylvia Gustin. 2024. Self-supervised cross-modal visual retrieval from brain activities. Pattern Recognition 145 (2024) 109915.","DOI":"10.1016\/j.patcog.2023.109915"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"crossref","unstructured":"Hong Zeng Nianzhang Xia Dongguan Qian Motonobu Hattori Chu Wang and Wanzeng Kong. 2023. DM-RE2I: A framework based on diffusion model for the reconstruction from EEG to image. Biomedical Signal Processing and Control 86 (2023) 105125.","DOI":"10.1016\/j.bspc.2023.105125"},{"key":"e_1_3_3_1_58_2","unstructured":"Wenjiang Zhang Sifeng Wang Yuwei Su Xinyu Li Chen Zhang and Suyu Zhong. 2025. NeuroBridge: Bio-Inspired Self-Supervised EEG-to-Image Decoding via Cognitive Priors and Bidirectional Semantic Alignment. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2511.06836 (2025)."}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:00:08Z","timestamp":1781535608000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810885"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":57,"alternative-id":["10.1145\/3805622.3810885","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810885","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}