{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T06:10:32Z","timestamp":1743055832131,"version":"3.40.3"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031732225"},{"type":"electronic","value":"9783031732232"}],"license":[{"start":{"date-parts":[[2024,11,8]],"date-time":"2024-11-08T00:00:00Z","timestamp":1731024000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,8]],"date-time":"2024-11-08T00:00:00Z","timestamp":1731024000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73223-2_14","type":"book-chapter","created":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T18:48:49Z","timestamp":1731005329000},"page":"238-254","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Aligning Neuronal Coding of\u00a0Dynamic Visual Scenes with\u00a0Foundation Vision Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8834-9316","authenticated-orcid":false,"given":"Rining","family":"Wu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4939-9393","authenticated-orcid":false,"given":"Feixiang","family":"Zhou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8430-443X","authenticated-orcid":false,"given":"Ziwei","family":"Yin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5391-7213","authenticated-orcid":false,"given":"K. Jian","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,8]]},"reference":[{"key":"14_CR1","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/978-3-031-25069-9_3","volume-title":"ECCV 2022","author":"S Amir","year":"2023","unstructured":"Amir, S., Gandelsman, Y., Bagon, S., Dekel, T.: On the effectiveness of ViT features as local semantic descriptors. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) ECCV 2022. LNCS, vol. 13804, pp. 39\u201355. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-25069-9_3"},{"key":"14_CR2","doi-asserted-by":"publisher","unstructured":"Anand, D., et al.: One-shot localization and segmentation of medical images with foundation models (2023). https:\/\/doi.org\/10.48550\/arXiv.2310.18642","DOI":"10.48550\/arXiv.2310.18642"},{"issue":"20","key":"14_CR3","doi-asserted-by":"publisher","first-page":"2470","DOI":"10.3390\/electronics10202470","volume":"10","author":"D Bhatt","year":"2021","unstructured":"Bhatt, D., et al.: CNN variants for computer vision: history, architecture, application, challenges and future scope. Electronics 10(20), 2470 (2021). https:\/\/doi.org\/10.3390\/electronics10202470","journal-title":"Electronics"},{"key":"14_CR4","doi-asserted-by":"publisher","unstructured":": Caron, M., et al.: Emerging properties in self-supervised vision transformers (2021). https:\/\/doi.org\/10.48550\/arXiv.2104.14294","DOI":"10.48550\/arXiv.2104.14294"},{"key":"14_CR5","doi-asserted-by":"publisher","unstructured":"Carreira, J., Zisserman, A.: Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset (2018). https:\/\/doi.org\/10.48550\/arXiv.1705.07750","DOI":"10.48550\/arXiv.1705.07750"},{"key":"14_CR6","doi-asserted-by":"publisher","unstructured":"Chen, Y., et al.: SecondPose: SE(3)-consistent dual-stream feature fusion for category-level pose estimation (2023). https:\/\/doi.org\/10.48550\/arXiv.2311.11125","DOI":"10.48550\/arXiv.2311.11125"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Chichilnisky, E.J.: A simple white noise analysis of neuronal light responses 12(2), 199\u2013213","DOI":"10.1080\/net.12.2.199.213"},{"key":"14_CR8","doi-asserted-by":"publisher","unstructured":"Consortium, M., et al.: Functional connectomics spanning multiple areas of mouse visual cortex (2021). https:\/\/doi.org\/10.1101\/2021.07.28.454025","DOI":"10.1101\/2021.07.28.454025"},{"key":"14_CR9","doi-asserted-by":"publisher","unstructured":"Cuturi, M., Blondel, M.: Soft-DTW: a differentiable loss function for time-series (2018). https:\/\/doi.org\/10.48550\/arXiv.1703.01541","DOI":"10.48550\/arXiv.1703.01541"},{"key":"14_CR10","doi-asserted-by":"publisher","unstructured":"Darcet, T., Oquab, M., Mairal, J., Bojanowski, P.: Vision Transformers Need Registers (2023). https:\/\/doi.org\/10.48550\/arXiv.2309.16588","DOI":"10.48550\/arXiv.2309.16588"},{"key":"14_CR11","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1146\/annurev-vision-082114-035334","volume":"1","author":"JB Demb","year":"2015","unstructured":"Demb, J.B., Singer, J.H.: Functional circuitry of the retina. Ann. Rev. Vis. Sci. 1, 263\u2013289 (2015)","journal-title":"Ann. Rev. Vis. Sci."},{"key":"14_CR12","doi-asserted-by":"publisher","unstructured":"Ding, X., Lee, D., Melander, J.B., Sivulka, G., Ganguli, S., Baccus, S.A.: Information geometry of the retinal representation manifold (2023). https:\/\/doi.org\/10.1101\/2023.05.17.541206","DOI":"10.1101\/2023.05.17.541206"},{"key":"14_CR13","doi-asserted-by":"publisher","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale (2021). https:\/\/doi.org\/10.48550\/arXiv.2010.11929","DOI":"10.48550\/arXiv.2010.11929"},{"issue":"8","key":"14_CR14","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1038\/nrn3783","volume":"15","author":"T Euler","year":"2014","unstructured":"Euler, T., Haverkamp, S., Schubert, T., Baden, T.: Retinal bipolar cells: elementary building blocks of vision. Nat. Rev. Neurosci. 15(8), 507\u2013519 (2014). https:\/\/doi.org\/10.1038\/nrn3783","journal-title":"Nat. Rev. Neurosci."},{"key":"14_CR15","doi-asserted-by":"publisher","unstructured":"Farha, Y.A., Gall, J.: MS-TCN: multi-stage temporal convolutional network for action segmentation (2019). https:\/\/doi.org\/10.48550\/arXiv.1903.01945","DOI":"10.48550\/arXiv.1903.01945"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Gauthier, J.L., et al.: Receptive fields in primate retina are coordinated to sample visual space more uniformly 7(4), e1000063","DOI":"10.1371\/journal.pbio.1000063"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Gollisch, T., Meister, M.: Eye smarter than scientists believed: neural computations in circuits of the retina 65(2), 150\u2013164","DOI":"10.1016\/j.neuron.2009.12.009"},{"key":"14_CR18","doi-asserted-by":"publisher","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift (2015). https:\/\/doi.org\/10.48550\/arXiv.1502.03167","DOI":"10.48550\/arXiv.1502.03167"},{"key":"14_CR19","doi-asserted-by":"publisher","unstructured":"Izmailov, P., Podoprikhin, D., Garipov, T., Vetrov, D., Wilson, A.G.: Averaging Weights Leads to Wider Optima and Better Generalization (2019). https:\/\/doi.org\/10.48550\/arXiv.1803.05407","DOI":"10.48550\/arXiv.1803.05407"},{"issue":"1","key":"14_CR20","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji, S., Xu, W., Yang, M., Yu, K.: 3D convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2013). https:\/\/doi.org\/10.1109\/TPAMI.2012.59","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"14_CR21","doi-asserted-by":"publisher","unstructured":"Karaev, N., Rocco, I., Graham, B., Neverova, N., Vedaldi, A., Rupprecht, C.: CoTracker: it is better to track together (2023). https:\/\/doi.org\/10.48550\/arXiv.2307.07635","DOI":"10.48550\/arXiv.2307.07635"},{"key":"14_CR22","doi-asserted-by":"publisher","unstructured":"Karamanlis, D., Schreyer, H.M., Gollisch, T.: Retinal encoding of natural scenes 8(1), 171\u2013193. https:\/\/doi.org\/10.1146\/annurev-vision-100820-114239","DOI":"10.1146\/annurev-vision-100820-114239"},{"issue":"2","key":"14_CR23","doi-asserted-by":"publisher","first-page":"648","DOI":"10.1109\/TPAMI.2021.3107160","volume":"44","author":"P Koniusz","year":"2022","unstructured":"Koniusz, P., Wang, L., Cherian, A.: Tensor representations for action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 44(2), 648\u2013665 (2022). https:\/\/doi.org\/10.1109\/TPAMI.2021.3107160","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"11","key":"14_CR24","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y Lecun","year":"1998","unstructured":"Lecun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998). https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proc. IEEE"},{"key":"14_CR25","doi-asserted-by":"publisher","unstructured":"Li, W., Joseph\u00a0Raj, A.N., Tjahjadi, T., Zhuang, Z.: Fusion of ANNs as decoder of retinal spike trains for scene reconstruction 52(13), 15164\u201315176. https:\/\/doi.org\/10.1007\/s10489-022-03402-w","DOI":"10.1007\/s10489-022-03402-w"},{"key":"14_CR26","doi-asserted-by":"publisher","unstructured":"Liu, J.K., Karamanlis, D., Gollisch, T.: Simple model for encoding natural images by retinal ganglion cells with nonlinear spatial integration 18(3), e1009925. https:\/\/doi.org\/10.1371\/journal.pcbi.1009925","DOI":"10.1371\/journal.pcbi.1009925"},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"Liu, J.K., et al.: Inference of neuronal functional circuitry with spike-triggered non-negative matrix factorization 8(1), 149","DOI":"10.1038\/s41467-017-00156-9"},{"key":"14_CR28","doi-asserted-by":"publisher","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization (2019). https:\/\/doi.org\/10.48550\/arXiv.1711.05101","DOI":"10.48550\/arXiv.1711.05101"},{"key":"14_CR29","doi-asserted-by":"publisher","unstructured":"Ma, G., Jiang, R., Yan, R., Tang, H.: Temporal conditioning spiking latent variable models of the neural response to natural visual scenes. arXiv (2023). https:\/\/doi.org\/10.48550\/arXiv.2306.12045,arXiv:2306.12045 [cs, q-bio]","DOI":"10.48550\/arXiv.2306.12045,"},{"issue":"11","key":"14_CR30","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1005189","volume":"12","author":"A Onken","year":"2016","unstructured":"Onken, A., Liu, J.K., Karunasekara, P.P.C.R., Delis, I., Gollisch, T., Panzeri, S.: Using Matrix and tensor factorizations for the single-trial analysis of population spike trains. PLoS Comput. Biol. 12(11), e1005189 (2016). https:\/\/doi.org\/10.1371\/journal.pcbi.1005189","journal-title":"PLoS Comput. Biol."},{"key":"14_CR31","unstructured":"van\u00a0den Oord, A., et al.: WaveNet: a generative model for raw audio (2016)"},{"key":"14_CR32","doi-asserted-by":"publisher","unstructured":"Oquab, M., et al.: DINOv2: learning robust visual features without supervision (2023). https:\/\/doi.org\/10.48550\/arXiv.2304.07193","DOI":"10.48550\/arXiv.2304.07193"},{"key":"14_CR33","doi-asserted-by":"publisher","unstructured":"Peebles, W., Xie, S.: Scalable diffusion models with transformers (2023). https:\/\/doi.org\/10.48550\/arXiv.2212.09748,arXiv:2212.09748 [cs]","DOI":"10.48550\/arXiv.2212.09748,"},{"key":"14_CR34","doi-asserted-by":"publisher","unstructured":"Pereda, A.E., Curti, S., Hoge, G., Cachope, R., Flores, C.E., Rash, J.E.: Gap junction-mediated electrical transmission: regulatory mechanisms and plasticity. Biochimica et Biophysica Acta (BBA) - Biomembranes 1828(1), 134\u2013146 (2013). https:\/\/doi.org\/10.1016\/j.bbamem.2012.05.026","DOI":"10.1016\/j.bbamem.2012.05.026"},{"key":"14_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"140","DOI":"10.1007\/978-3-642-15567-3_11","volume-title":"Computer Vision \u2013 ECCV 2010","author":"GW Taylor","year":"2010","unstructured":"Taylor, G.W., Fergus, R., LeCun, Y., Bregler, C.: Convolutional learning of spatio-temporal features. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010. LNCS, vol. 6316, pp. 140\u2013153. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15567-3_11"},{"key":"14_CR36","doi-asserted-by":"publisher","unstructured":"Turishcheva, P., et al.: The dynamic sensorium competition for predicting large-scale mouse visual cortex activity from videos (2023). https:\/\/doi.org\/10.48550\/arXiv.2305.19654, arXiv:2305.19654 [q-bio]","DOI":"10.48550\/arXiv.2305.19654"},{"key":"14_CR37","unstructured":"Turishcheva, P., et al.: The dynamic sensorium competition for predicting large-scale mouse visual cortex activity from videos (2023)"},{"key":"14_CR38","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems. vol.\u00a030. Curran Associates, Inc. (2017)"},{"key":"14_CR39","doi-asserted-by":"publisher","unstructured":"Wang, C., Fang, C., Zou, Y., Yang, J., Sawan, M.: SpikeSEE: an energy-efficient dynamic scenes processing framework for retinal prostheses 164, 357\u2013368. https:\/\/doi.org\/10.1016\/j.neunet.2023.05.002","DOI":"10.1016\/j.neunet.2023.05.002"},{"key":"14_CR40","doi-asserted-by":"publisher","unstructured":"Wang, E.Y., et al.: Towards a foundation model of the mouse visual cortex. bioRxiv (2023). https:\/\/doi.org\/10.1101\/2023.03.21.533548","DOI":"10.1101\/2023.03.21.533548"},{"key":"14_CR41","doi-asserted-by":"publisher","DOI":"10.1101\/2023.03.21.533548","author":"EY Wang","year":"2023","unstructured":"Wang, E.Y., et al.: Towards a foundation model of the mouse visual. Cortex (2023). https:\/\/doi.org\/10.1101\/2023.03.21.533548","journal-title":"Cortex"},{"key":"14_CR42","doi-asserted-by":"publisher","unstructured":"Wang, L., Koniusz, P.: Self-supervising action recognition by statistical moment and subspace descriptors. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 4324\u20134333 (2021). https:\/\/doi.org\/10.1145\/3474085.3475572","DOI":"10.1145\/3474085.3475572"},{"key":"14_CR43","doi-asserted-by":"publisher","first-page":"E003","DOI":"10.1017\/S0952523823000019","volume":"40","author":"J Wu","year":"2023","unstructured":"Wu, J., Kim, Y.J., Dacey, D.M., Troy, J.B., Smith, R.G.: Two mechanisms for direction selectivity in a model of the primate starburst amacrine cell. Vis. Neurosci. 40, E003 (2023). https:\/\/doi.org\/10.1017\/S0952523823000019","journal-title":"Vis. Neurosci."},{"issue":"1","key":"14_CR44","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1109\/tcyb.2020.2972983","volume":"52","author":"Q Yan","year":"2022","unstructured":"Yan, Q., et al.: Revealing fine structures of the retinal receptive field by deep-learning networks. IEEE Trans. Cybernet. 52(1), 39\u201350 (2022). https:\/\/doi.org\/10.1109\/tcyb.2020.2972983","journal-title":"IEEE Trans. Cybernet."},{"issue":"6","key":"14_CR45","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1016\/j.tins.2022.03.005","volume":"45","author":"SJ Zapp","year":"2022","unstructured":"Zapp, S.J., Nitsche, S., Gollisch, T.: Retinal receptive-field substructure: scaffolding for coding and computation. Trends Neurosci. 45(6), 430\u2013445 (2022). https:\/\/doi.org\/10.1016\/j.tins.2022.03.005","journal-title":"Trends Neurosci."},{"key":"14_CR46","doi-asserted-by":"publisher","unstructured":"Zhang, T., et al.: DVIS++: improved decoupled framework for universal video segmentation (2023). https:\/\/doi.org\/10.48550\/arXiv.2312.13305","DOI":"10.48550\/arXiv.2312.13305"},{"key":"14_CR47","doi-asserted-by":"publisher","unstructured":"Zhang, Y., et al.: Reconstruction of natural visual scenes from neural spikes with deep neural networks 125, 19\u201330. https:\/\/doi.org\/10.1016\/j.neunet.2020.01.033","DOI":"10.1016\/j.neunet.2020.01.033"},{"key":"14_CR48","doi-asserted-by":"publisher","unstructured":"Zhao, Y., Ma, H., Kong, S., Fowlkes, C.: Instance tracking in 3D scenes from egocentric videos (2023). https:\/\/doi.org\/10.48550\/arXiv.2312.04117","DOI":"10.48550\/arXiv.2312.04117"},{"issue":"10","key":"14_CR49","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2021.100350","volume":"2","author":"Y Zheng","year":"2021","unstructured":"Zheng, Y., Jia, S., Yu, Z., Liu, J.K., Huang, T.: Unraveling neural coding of dynamic natural visual scenes via convolutional recurrent neural networks. Patterns 2(10), 100350 (2021). https:\/\/doi.org\/10.1016\/j.patter.2021.100350","journal-title":"Patterns"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73223-2_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T19:05:43Z","timestamp":1731006343000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73223-2_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,8]]},"ISBN":["9783031732225","9783031732232"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73223-2_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,8]]},"assertion":[{"value":"8 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}