{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,8]],"date-time":"2026-07-08T15:54:25Z","timestamp":1783526065957,"version":"3.55.0"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730092","type":"print"},{"value":"9783031730108","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,10]],"date-time":"2024-11-10T00:00:00Z","timestamp":1731196800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,10]],"date-time":"2024-11-10T00:00:00Z","timestamp":1731196800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73010-8_21","type":"book-chapter","created":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T13:11:17Z","timestamp":1731157877000},"page":"353-369","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Enhancing Cross-Subject fMRI-to-Video Decoding with\u00a0Global-Local Functional Alignment"],"prefix":"10.1007","author":[{"given":"Chong","family":"Li","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xuelin","family":"Qian","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yun","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jingyang","family":"Huo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiangyang","family":"Xue","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yanwei","family":"Fu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jianfeng","family":"Feng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,11,10]]},"reference":[{"key":"21_CR1","doi-asserted-by":"crossref","unstructured":"Bain, M., Nagrani, A., Varol, G., Zisserman, A.: Frozen in time: a joint video and image encoder for end-to-end retrieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1728\u20131738 (2021)","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"21_CR2","doi-asserted-by":"publisher","unstructured":"Bazeille, T., DuPre, E., Richard, H., Poline, J.B., Thirion, B.: An empirical evaluation of functional alignment using inter-subject decoding. Neuroimage 245, 118683 (2021). https:\/\/doi.org\/10.1016\/j.neuroimage.2021.118683, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S1053811921009563","DOI":"10.1016\/j.neuroimage.2021.118683"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Bazeille, T., Richard, H., Janati, H., Thirion, B.: Local optimal transport for functional brain template estimation. In: Information Processing in Medical Imaging (2019). https:\/\/api.semanticscholar.org\/CorpusID:162169103","DOI":"10.1007\/978-3-030-20351-1_18"},{"key":"21_CR4","doi-asserted-by":"publisher","unstructured":"Chau, W., McIntosh, A.R.: The talairach coordinate of a point in the mni space: how to interpret it. Neuroimage 25(2), 408\u2013416 (2005). https:\/\/doi.org\/10.1016\/j.neuroimage.2004.12.007, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S1053811904007554","DOI":"10.1016\/j.neuroimage.2004.12.007"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Z., Qing, J., Xiang, T., Yue, W.L., Zhou, J.H.: Seeing beyond the brain: conditional diffusion model with sparse masked modeling for vision decoding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22710\u201322720 (2023)","DOI":"10.1109\/CVPR52729.2023.02175"},{"key":"21_CR6","unstructured":"Chen, Z., Qing, J., Zhou, J.H.: Cinematic mindscapes: high-quality video reconstruction from brain activity. arXiv preprint arXiv:2305.11675 (2023)"},{"key":"21_CR7","unstructured":"Conroy, B., Singer, B., Haxby, J., Ramadge, P.J.: fMRI-based inter-subject cortical alignment using functional connectivity. In: Advances in neural information processing systems, vol. 22 (2009)"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 12873\u201312883 (2021)","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"21_CR9","doi-asserted-by":"publisher","first-page":"23","DOI":"10.3389\/fninf.2015.00023","volume":"9","author":"JS Gao","year":"2015","unstructured":"Gao, J.S., Huth, A.G., Lescroart, M.D., Gallant, J.L.: Pycortex: an interactive surface visualizer for fMRI. Front. Neuroinform. 9, 23 (2015)","journal-title":"Front. Neuroinform."},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Gao, J., Fu, Y., Wang, Y., Qian, X., Feng, J., Fu, Y.: Mind-3D: reconstruct high-quality 3D objects in human brain. arXiv preprint arXiv:2312.07485 (2023)","DOI":"10.1007\/978-3-031-72970-6_18"},{"issue":"6","key":"21_CR11","doi-asserted-by":"publisher","first-page":"1677","DOI":"10.1109\/JBHI.2019.2940695","volume":"24","author":"Y Gao","year":"2020","unstructured":"Gao, Y., Zhang, Y., Cao, Z., Guo, X., Zhang, J.: Decoding brain states from fMRI signals by using unsupervised domain adaptation. IEEE J. Biomed. Health Inform. 24(6), 1677\u20131685 (2020). https:\/\/doi.org\/10.1109\/JBHI.2019.2940695","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"21_CR12","doi-asserted-by":"crossref","unstructured":"Glasser, M.F., et al.: A multi-modal parcellation of human cerebral cortex. Nature 536, 171\u2013178 (2016). https:\/\/api.semanticscholar.org\/CorpusID:205249949","DOI":"10.1038\/nature18933"},{"key":"21_CR13","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1016\/j.neuroimage.2013.04.127","volume":"80","author":"MF Glasser","year":"2013","unstructured":"Glasser, M.F., et al.: The minimal preprocessing pipelines for the human connectome project. Neuroimage 80, 105\u2013124 (2013)","journal-title":"Neuroimage"},{"key":"21_CR14","unstructured":"Gong, Z., et al.: MindTuner: cross-subject visual decoding with visual fingerprint and semantic correction. arXiv preprint arXiv:2404.12630 (2024)"},{"key":"21_CR15","doi-asserted-by":"publisher","unstructured":"Han, K., et al.: Variational autoencoder: an unsupervised model for encoding and decoding fmri activity in visual cortex. Neuroimage 198, 125\u2013136 (2019). https:\/\/doi.org\/10.1016\/j.neuroimage.2019.05.039, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S1053811919304318","DOI":"10.1016\/j.neuroimage.2019.05.039"},{"key":"21_CR16","doi-asserted-by":"publisher","unstructured":"Haxby, J., et al.: A common, high-dimensional model of the representational space in human ventral temporal cortex. Neuron 72(2), 404\u2013416 (2011). https:\/\/doi.org\/10.1016\/j.neuron.2011.08.026, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0896627311007811","DOI":"10.1016\/j.neuron.2011.08.026"},{"issue":"2","key":"21_CR17","doi-asserted-by":"publisher","first-page":"170","DOI":"10.1007\/s11633-020-1263-y","volume":"18","author":"S Huang","year":"2021","unstructured":"Huang, S., Shao, W., Wang, M.L., Zhang, D.Q.: fMRI-based decoding of visual information from human brain activity: A brief review. Int. J. Autom. Comput. 18(2), 170\u2013184 (2021)","journal-title":"Int. J. Autom. Comput."},{"issue":"2","key":"21_CR18","doi-asserted-by":"publisher","first-page":"352","DOI":"10.1109\/TPAMI.2017.2670560","volume":"40","author":"YG Jiang","year":"2018","unstructured":"Jiang, Y.G., Wu, Z., Wang, J., Xue, X., Chang, S.F.: Exploiting feature and class relationships in video categorization with regularized deep neural networks. IEEE Trans. Pattern Anal. Mach. Intell. 40(2), 352\u2013364 (2018). https:\/\/doi.org\/10.1109\/TPAMI.2017.2670560","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"21_CR19","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.C.H.: BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In: International Conference on Machine Learning (2023). https:\/\/api.semanticscholar.org\/CorpusID:256390509"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, M., Chen, F., Zhang, D.: Graph-based decoding model for functional alignment of unaligned fMRI data. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 2653\u20132660 (2020)","DOI":"10.1609\/aaai.v34i03.5650"},{"key":"21_CR21","unstructured":"Lin, S., Sprague, T., Singh, A.K.: Mind reader: Reconstructing complex images from brain activities. In: Advances in Neural Information Processing Systems, vol. 35, pp. 29624\u201329636 (2022)"},{"key":"21_CR22","unstructured":"Liu, Y., Ma, Y., Zhou, W., Zhu, G., Zheng, N.: BrainCLIP: bridging brain and visual-linguistic representation via clip for generic natural visual stimulus decoding from fMRI. arXiv preprint arXiv:2302.12971 (2023)"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Miller, K.L., et al.: Multimodal population brain imaging in the UK biobank prospective epidemiological study. Nat. Neurosci. 19, 1523\u20131536 (2016). https:\/\/api.semanticscholar.org\/CorpusID:1018393","DOI":"10.1038\/nn.4393"},{"key":"21_CR24","doi-asserted-by":"crossref","unstructured":"Ozcelik, F., VanRullen, R.: Brain-diffuser: natural scene reconstruction from fMRI signals using generative latent diffusion. arXiv preprint arXiv:2303.05334 (2023)","DOI":"10.1038\/s41598-023-42891-8"},{"key":"21_CR25","unstructured":"Qian, X., Wang, Y., Fu, Y., Sun, X., Xue, X., Feng, J.: Joint fMRI decoding and encoding with latent embedding alignment (2023). https:\/\/api.semanticscholar.org\/CorpusID:259076476"},{"key":"21_CR26","unstructured":"Qian, X., Wang, Y., Huo, J., Feng, J., Fu, Y.: fMRI-PTE: a large-scale fMRI pretrained transformer encoder for multi-subject brain activity decoding. arXiv preprint arXiv:2311.00342 (2023)"},{"key":"21_CR27","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"21_CR28","doi-asserted-by":"publisher","unstructured":"Ram\u00edrez, F.M., Revsine, C., Merriam, E.P.: What do across-subject analyses really tell us about neural coding? Neuropsychologia 143, 107489 (2020). https:\/\/doi.org\/10.1016\/j.neuropsychologia.2020.107489, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0028393220301603","DOI":"10.1016\/j.neuropsychologia.2020.107489"},{"key":"21_CR29","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"21_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"issue":"1","key":"21_CR31","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1093\/cercor\/bhp085","volume":"20","author":"MR Sabuncu","year":"2009","unstructured":"Sabuncu, M.R., Singer, B.D., Conroy, B., Bryan, R.E., Ramadge, P.J., Haxby, J.V.: Function-based intersubject alignment of human cortical anatomy. Cerebral Cortex 20(1), 130\u2013140 (2009). https:\/\/doi.org\/10.1093\/cercor\/bhp085","journal-title":"Cerebral Cortex"},{"key":"21_CR32","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1146\/annurev-psych-120710-100412","volume":"63","author":"F Tong","year":"2012","unstructured":"Tong, F., Pratte, M.S.: Decoding patterns of human brain activity. Annu. Rev. Psychol. 63, 483\u2013509 (2012)","journal-title":"Annu. Rev. Psychol."},{"key":"21_CR33","unstructured":"Tong, Z., Song, Y., Wang, J., Wang, L.: VideoMAE: masked autoencoders are data-efficient learners for self-supervised video pre-training. In: Advances in Neural Information Processing Systems (2022)"},{"issue":"20","key":"21_CR34","doi-asserted-by":"publisher","first-page":"4502","DOI":"10.1093\/cercor\/bhab498","volume":"32","author":"C Wang","year":"2022","unstructured":"Wang, C., et al.: Reconstructing rapid natural vision with fMRI-conditional video generative adversarial network. Cerebral Cortex 32(20), 4502\u20134511 (2022). https:\/\/doi.org\/10.1093\/cercor\/bhab498","journal-title":"Cerebral Cortex"},{"key":"21_CR35","unstructured":"Wang, J., Yuan, H., Chen, D., Zhang, Y., Wang, X., Zhang, S.: Modelscope text-to-video technical report. arXiv preprint arXiv:2308.06571 (2023)"},{"key":"21_CR36","doi-asserted-by":"crossref","unstructured":"Wang, S., Liu, S., Tan, Z., Wang, X.: MindBridge: a cross-subject brain decoding framework. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11333\u201311342 (2024)","DOI":"10.1109\/CVPR52733.2024.01077"},{"issue":"4","key":"21_CR37","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A., Sheikh, H., Simoncelli, E.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004). https:\/\/doi.org\/10.1109\/TIP.2003.819861","journal-title":"IEEE Trans. Image Process."},{"issue":"2","key":"21_CR38","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1093\/cercor\/3.2.79","volume":"3","author":"JDG Watson","year":"1993","unstructured":"Watson, J.D.G., et al.: Area V5 of the human brain: evidence from a combined study using positron emission tomography and magnetic resonance imaging. Cerebral Cortex 3(2), 79\u201394 (1993). https:\/\/doi.org\/10.1093\/cercor\/3.2.79","journal-title":"Cerebral Cortex"},{"key":"21_CR39","doi-asserted-by":"publisher","unstructured":"Wen, H., Shi, J., Zhang, Y., Lu, K.H., Cao, J., Liu, Z.: Neural encoding and decoding with deep learning for dynamic natural vision. Cerebral Cortex 28(12), 4136\u20134160 (2017). https:\/\/doi.org\/10.1093\/cercor\/bhx268","DOI":"10.1093\/cercor\/bhx268"},{"key":"21_CR40","doi-asserted-by":"crossref","unstructured":"Wu, J.Z., et al.: Tune-a-video: one-shot tuning of image diffusion models for text-to-video generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7623\u20137633 (2023)","DOI":"10.1109\/ICCV51070.2023.00701"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73010-8_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T02:24:39Z","timestamp":1733019879000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73010-8_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,10]]},"ISBN":["9783031730092","9783031730108"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73010-8_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,10]]},"assertion":[{"value":"10 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}