{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T01:15:03Z","timestamp":1742951703925,"version":"3.40.3"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031729034"},{"type":"electronic","value":"9783031729041"}],"license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72904-1_11","type":"book-chapter","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T13:29:47Z","timestamp":1732109387000},"page":"179-195","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Neural Video Representation with\u00a0Temporally Coherent Modulation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0891-0861","authenticated-orcid":false,"given":"Seungjun","family":"Shin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9286-0552","authenticated-orcid":false,"given":"Suji","family":"Kim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9139-8699","authenticated-orcid":false,"given":"Dokwan","family":"Oh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"issue":"2","key":"11_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3406183","volume":"3","author":"P Andersson","year":"2020","unstructured":"Andersson, P., Nilsson, J., Akenine-M\u00f6ller, T., Oskarsson, M., \u00c5str\u00f6m, K., Fairchild, M.D.: FLIP: a difference evaluator for alternating images. Proc. ACM Comput. Graph. Interact. Tech. 3(2), 1\u201315 (2020)","journal-title":"Proc. ACM Comput. Graph. Interact. Tech."},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Bai, Y., Dong, C., Wang, C., Yuan, C.: PS-NeRV: patch-wise stylized neural representations for videos. In: 2023 IEEE International Conference on Image Processing (ICIP), pp. 41\u201345. IEEE (2023)","DOI":"10.1109\/ICIP49359.2023.10222144"},{"key":"11_CR3","unstructured":"Bauer, M., Dupont, E., Brock, A., Rosenbaum, D., Schwarz, J.R., Kim, H.: Spatial functa: Scaling functa to ImageNet classification and generation. arXiv preprint arXiv: 2302.03130 (2023)"},{"key":"11_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"608","DOI":"10.1007\/978-3-030-58526-6_36","volume-title":"Computer Vision \u2013 ECCV 2020","author":"R Chabra","year":"2020","unstructured":"Chabra, R., et al.: Deep Local shapes: learning local SDF priors for detailed 3D reconstruction. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12374, pp. 608\u2013625. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58526-6_36"},{"key":"11_CR5","doi-asserted-by":"crossref","unstructured":"Chen, H., Gwilliam, M., Lim, S.N., Shrivastava, A.: HNeRV: a hybrid neural representation for videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00990"},{"key":"11_CR6","unstructured":"Chen, H., He, B., Wang, H., Ren, Y., Lim, S.N., Shrivastava, A.: NeRV: neural representations for videos. In: Advances in Neural Information Processing System, vol. 34, pp. 21557\u201321568 (2021)"},{"key":"11_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Y., Liu, S., Wang, X.: Learning continuous image representation with local implicit image function. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8628\u20138638 (2021)","DOI":"10.1109\/CVPR46437.2021.00852"},{"key":"11_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: VideoINR: learning video implicit neural representation for continuous space-time super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2047\u20132057 (2022)","DOI":"10.1109\/CVPR52688.2022.00209"},{"key":"11_CR9","doi-asserted-by":"crossref","unstructured":"Chibane, J., Alldieck, T., Pons-Moll, G.: Implicit functions in feature space for 3D shape reconstruction and completion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6970\u20136981 (2020)","DOI":"10.1109\/CVPR42600.2020.00700"},{"issue":"4","key":"11_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3386569.3392457","volume":"39","author":"M Chu","year":"2020","unstructured":"Chu, M., Xie, Y., Mayer, J., Leal-Taix\u00e9, L., Thuerey, N.: Learning temporal coherence via self-supervision for GAN-based video generation. ACM Trans. Graph. (TOG) 39(4), 1\u201375 (2020)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"11_CR11","doi-asserted-by":"crossref","unstructured":"Deng, C.L., Tartaglione, E.: Compressing explicit voxel grid representations: fast nerfs become also small. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1236\u20131245 (2023)","DOI":"10.1109\/WACV56688.2023.00129"},{"key":"11_CR12","doi-asserted-by":"crossref","unstructured":"Dong, Z., Guo, C., Song, J., Chen, X., Geiger, A., Hilliges, O.: PINA: learning a personalized implicit neural avatar from a single RGB-D video sequence. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20470\u201320480 (2022)","DOI":"10.1109\/CVPR52688.2022.01982"},{"key":"11_CR13","unstructured":"Dupont, E., Loya, H., Alizadeh, M., Golinski, A., Teh, Y., Doucet, A.: COIN++: neural compression across modalities. Trans. Mach. Learn. Res. 2022(11) (2022)"},{"key":"11_CR14","unstructured":"Dupont, E., Golinski, A., Alizadeh, M., Teh, Y.W., Doucet, A.: COIN: compression with implicit neural representations. In: Neural Compression: From Information Theory to Applications\u2013Workshop@ ICLR 2021 (2021)"},{"key":"11_CR15","unstructured":"Dupont, E., Kim, H., Eslami, S.A., Rezende, D.J., Rosenbaum, D.: From data to functa: your data point is a function and you can treat it like one. In: International Conference on Machine Learning, pp. 5694\u20135725. PMLR (2022)"},{"key":"11_CR16","doi-asserted-by":"crossref","unstructured":"Fang, S., Xu, W., Wang, H., Yang, Y., Wang, Y., Zhou, S.: One is all: bridging the gap between neural radiance fields architectures with progressive volume distillation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 597\u2013605 (2023)","DOI":"10.1609\/aaai.v37i1.25135"},{"key":"11_CR17","doi-asserted-by":"crossref","unstructured":"Figueir\u00eado, P., Paliwal, A., Kalantari, N.K.: Frame interpolation for dynamic scenes with implicit flow encoding. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 218\u2013228 (2023)","DOI":"10.1109\/WACV56688.2023.00030"},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Fridovich-Keil, S., Yu, A., Tancik, M., Chen, Q., Recht, B., Kanazawa, A.: Plenoxels: Radiance fields without neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5501\u20135510 (2022)","DOI":"10.1109\/CVPR52688.2022.00542"},{"key":"11_CR19","doi-asserted-by":"crossref","unstructured":"Genova, K., Cole, F., Sud, A., Sarna, A., Funkhouser, T.: Local deep implicit functions for 3D shape. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4857\u20134866 (2020)","DOI":"10.1109\/CVPR42600.2020.00491"},{"key":"11_CR20","doi-asserted-by":"crossref","unstructured":"Girish, S., Shrivastava, A., Gupta, K.: SHACIRA: Scalable hash-grid compression for implicit neural representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 17513\u201317524 (2023)","DOI":"10.1109\/ICCV51070.2023.01606"},{"key":"11_CR21","doi-asserted-by":"crossref","unstructured":"Gomes, C., Azevedo, R., Schroers, C.: Video compression with entropy-constrained neural representations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18497\u201318506 (2023)","DOI":"10.1109\/CVPR52729.2023.01774"},{"key":"11_CR22","doi-asserted-by":"crossref","unstructured":"He, B., et al.: Towards scalable neural representation for diverse videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6132\u20136142 (2023)","DOI":"10.1109\/CVPR52729.2023.00594"},{"issue":"37","key":"11_CR23","first-page":"5","volume":"910","author":"T Installations","year":"1999","unstructured":"Installations, T., Line, L.: Subjective video quality assessment methods for multimedia applications. Networks 910(37), 5 (1999)","journal-title":"Networks"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"Jiang, C., et al.: Local implicit grid representations for 3D scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6001\u20136010 (2020)","DOI":"10.1109\/CVPR42600.2020.00604"},{"key":"11_CR25","unstructured":"Kim, S., Yu, S., Lee, J., Shin, J.: Scalable neural video representations with learnable positional features. arXiv preprint arXiv:2210.06823 (2022)"},{"key":"11_CR26","doi-asserted-by":"crossref","unstructured":"Lee, J.C., Rho, D., Ko, J.H., Park, E.: FFNeRV: flow-guided frame-wise neural representations for videos. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 7859\u20137870 (2023)","DOI":"10.1145\/3581783.3612444"},{"key":"11_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/978-3-031-19833-5_16","volume-title":"Computer Vision-ECCV 2022","author":"Z Li","year":"2022","unstructured":"Li, Z., Wang, M., Pi, H., Xu, K., Mei, J., Liu, Y.: E-NeRV: expedite neural video representation with disentangled spatial-temporal context. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, Part XXXV. LNCS, vol. 13695, pp. 267\u2013284. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19833-5_16"},{"key":"11_CR28","unstructured":"Liu, L., Gu, J., Zaw Lin, K., Chua, T.S., Theobalt, C.: Neural sparse voxel fields. In: Advances in Neural Information Processing Systems, vol. 33, pp. 15651\u201315663 (2020)"},{"key":"11_CR29","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"Maiya, S.R., et al.: Nirvana: neural implicit representations of videos with adaptive networks and autoregressive patch-wise modeling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.01382"},{"key":"11_CR31","doi-asserted-by":"crossref","unstructured":"Mehta, I., Gharbi, M., Barnes, C., Shechtman, E., Ramamoorthi, R., Chandraker, M.: Modulated periodic activations for generalizable local functional representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14214\u201314223 (2021)","DOI":"10.1109\/ICCV48922.2021.01395"},{"key":"11_CR32","doi-asserted-by":"crossref","unstructured":"Mercat, A., Viitanen, M., Vanne, J.: UVG dataset: 50\/120FPS 4K sequences for video codec analysis and development. In: Proceedings of the 11th ACM Multimedia Systems Conference, pp. 297\u2013302 (2020)","DOI":"10.1145\/3339825.3394937"},{"key":"11_CR33","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P., Tancik, M., Barron, J., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. In: European Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"11_CR34","unstructured":"M\u00fcller, T.: tiny-cuda-nn (2021). https:\/\/github.com\/NVlabs\/tiny-cuda-nn"},{"issue":"4","key":"11_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530127","volume":"41","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. (ToG) 41(4), 1\u201315 (2022)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"11_CR36","unstructured":"Pont-Tuset, J., Perazzi, F., Caelles, S., Arbel\u00e1ez, P., Sorkine-Hornung, A., Van Gool, L.: The 2017 Davis challenge on video object segmentation. arXiv:1704.00675 (2017)"},{"key":"11_CR37","doi-asserted-by":"crossref","unstructured":"Rho, D., Cho, J., Ko, J.H., Park, E.: Neural residual flow fields for efficient video representations. In: Proceedings of the Asian Conference on Computer Vision, pp. 3447\u20133463 (2022)","DOI":"10.1007\/978-3-031-26284-5_28"},{"key":"11_CR38","unstructured":"Sitzmann, V., Martel, J., Bergman, A., Lindell, D., Wetzstein, G.: Implicit neural representations with periodic activation functions. In: Advances in Neural Information Processing Systems, vol. 33, pp. 7462\u20137473 (2020)"},{"key":"11_CR39","unstructured":"Su, K., Chen, M., Shlizerman, E.: INRAS: implicit neural representation for audio scenes. Advances in Neural Information Processing Systems, vol. 35, pp. 8144\u20138158 (2022)"},{"issue":"12","key":"11_CR40","doi-asserted-by":"publisher","first-page":"1649","DOI":"10.1109\/TCSVT.2012.2221191","volume":"22","author":"GJ Sullivan","year":"2012","unstructured":"Sullivan, G.J., Ohm, J.R., Han, W.J., Wiegand, T.: Overview of the high efficiency video coding (HEVC) standard. IEEE Trans. Circuits Syst. Video Technol. 22(12), 1649\u20131668 (2012)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"11_CR41","doi-asserted-by":"crossref","unstructured":"Szatkowski, F., Piczak, K.J., Spurek, P., Tabor, J., Trzci\u0144ski, T.: Hypersound: generating implicit neural representations of audio signals with hypernetworks. arXiv preprint arXiv:2211.01839 (2022)","DOI":"10.1007\/978-3-031-43421-1_39"},{"key":"11_CR42","doi-asserted-by":"crossref","unstructured":"Takikawa, T., et al.: Neural geometric level of detail: real-time rendering with implicit 3d shapes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11358\u201311367 (2021)","DOI":"10.1109\/CVPR46437.2021.01120"},{"key":"11_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"402","DOI":"10.1007\/978-3-030-58536-5_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Teed","year":"2020","unstructured":"Teed, Z., Deng, J.: RAFT: recurrent all-pairs field transforms for optical flow. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12347, pp. 402\u2013419. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58536-5_24"},{"issue":"146","key":"11_CR44","first-page":"10","volume":"2006","author":"S Tomar","year":"2006","unstructured":"Tomar, S.: Converting video formats with FFMPEG. Linux J. 2006(146), 10 (2006)","journal-title":"Linux J."},{"key":"11_CR45","doi-asserted-by":"crossref","unstructured":"Wang, H., et al.: MCL-JCV: a JND-based h. 264\/AVC video quality assessment dataset. In: 2016 IEEE International Conference on Image Processing (ICIP), pp. 1509\u20131513. IEEE (2016)","DOI":"10.1109\/ICIP.2016.7532610"},{"key":"11_CR46","doi-asserted-by":"crossref","unstructured":"Wiegand, T., Sullivan, G.J., Bjontegaard, G., Luthra, A.: Overview of the h. 264\/AVC video coding standard. IEEE Trans. Circ. Syst. Video Technol. 13(7), 560\u2013576 (2003)","DOI":"10.1109\/TCSVT.2003.815165"},{"key":"11_CR47","doi-asserted-by":"crossref","unstructured":"Yang, J., Zheng, W.S., Yang, Q., Chen, Y.C., Tian, Q.: Spatial-temporal graph convolutional network for video-based person re-identification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3289\u20133299 (2020)","DOI":"10.1109\/CVPR42600.2020.00335"},{"key":"11_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"11_CR49","doi-asserted-by":"crossref","unstructured":"Zhao, Q., Asif, M.S., Ma, Z.: DNeRV: modeling inherent dynamics via difference neural representation for videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2031\u20132040 (2023)","DOI":"10.1109\/CVPR52729.2023.00202"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72904-1_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T19:50:53Z","timestamp":1742068253000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72904-1_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"ISBN":["9783031729034","9783031729041"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72904-1_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"21 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}