{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:33:54Z","timestamp":1777656834150,"version":"3.51.4"},"publisher-location":"Cham","reference-count":72,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031728471","type":"print"},{"value":"9783031728488","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72848-8_18","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:37:01Z","timestamp":1732801021000},"page":"305-322","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Long-Term Temporal Context Gathering for\u00a0Neural Video Compression"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-4278-9400","authenticated-orcid":false,"given":"Linfeng","family":"Qi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8814-9691","authenticated-orcid":false,"given":"Zhaoyang","family":"Jia","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9881-2124","authenticated-orcid":false,"given":"Jiahao","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5635-2916","authenticated-orcid":false,"given":"Bin","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2188-3028","authenticated-orcid":false,"given":"Houqiang","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5383-6424","authenticated-orcid":false,"given":"Yan","family":"Lu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"18_CR1","unstructured":"ECM. https:\/\/vcgit.hhi.fraunhofer.de\/ecm\/ECM\/"},{"key":"18_CR2","unstructured":"HM. https:\/\/vcgit.hhi.fraunhofer.de\/jvet\/HM\/"},{"key":"18_CR3","unstructured":"Original vimeo links. https:\/\/github.com\/anchen1011\/toflow\/blob\/master\/data\/original_vimeo_links.txt"},{"key":"18_CR4","unstructured":"VTM. https:\/\/vcgit.hhi.fraunhofer.de\/jvet\/VVCSoftware_VTM\/"},{"issue":"6","key":"18_CR5","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1109\/79.733497","volume":"15","author":"GJ Sullivan","year":"1998","unstructured":"Sullivan, G.J., Wiegand, T.: Rate-distortion optimization for video compression. IEEE Sig. Process. Mag. 15(6), 74\u201390 (1998)","journal-title":"IEEE Sig. Process. Mag."},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Agustsson, E., Minnen, D., Johnston, N., Balle, J., Hwang, S.J., Toderici, G.: Scale-space flow for end-to-end optimized video compression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8503\u20138512 (2020)","DOI":"10.1109\/CVPR42600.2020.00853"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Agustsson, E., Tschannen, M., Mentzer, F., Timofte, R., Gool, L.V.: Generative adversarial networks for extreme learned image compression. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 221\u2013231 (2019)","DOI":"10.1109\/ICCV.2019.00031"},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Lu\u010di\u0107, M., Schmid, C.: ViViT: a video vision transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6836\u20136846 (2021)","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"18_CR9","unstructured":"Bjontegaard, G.: Calculation of average PSNR differences between RD-curves. VCEG-M33 (2001)"},{"key":"18_CR10","unstructured":"Bossen, F., et\u00a0al.: Common test conditions and software reference configurations. JCTVC-L1100 12(7), 1 (2013)"},{"issue":"10","key":"18_CR11","doi-asserted-by":"publisher","first-page":"3736","DOI":"10.1109\/TCSVT.2021.3101953","volume":"31","author":"B Bross","year":"2021","unstructured":"Bross, B., et al.: Overview of the versatile video coding (VVC) standard and its applications. IEEE Trans. Circ. Syst. Video Technol. 31(10), 3736\u20133764 (2021)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"18_CR12","doi-asserted-by":"publisher","first-page":"1697","DOI":"10.1109\/TIP.2022.3140608","volume":"31","author":"Z Chen","year":"2022","unstructured":"Chen, Z., Gu, S., Lu, G., Xu, D.: Exploiting intra-slice and inter-slice redundancy for learning-based lossless volumetric image compression. IEEE Trans. Image Process. 31, 1697\u20131707 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: Neural video compression with spatio-temporal cross-covariance transformers. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 8543\u20138551 (2023)","DOI":"10.1145\/3581783.3611960"},{"key":"18_CR14","doi-asserted-by":"crossref","unstructured":"Dai, J., et al.: Deformable convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 764\u2013773 (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Dey, R., Salem, F.M.: Gate-variants of gated recurrent unit (GRU) neural networks. In: 2017 IEEE 60th International Midwest Symposium on Circuits and Systems (MWSCAS), pp. 1597\u20131600. IEEE (2017)","DOI":"10.1109\/MWSCAS.2017.8053243"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Djelouah, A., Campos, J., Schaub-Meyer, S., Schroers, C.: Neural inter-frame compression for video coding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6421\u20136429 (2019)","DOI":"10.1109\/ICCV.2019.00652"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Gao, Y., Li, J., Chu, L., Lu, Y.: Implicit motion function. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19278\u201319289 (2024)","DOI":"10.1109\/CVPR52733.2024.01824"},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"Ho, Y.H., Chang, C.P., Chen, P.Y., Gnutti, A., Peng, W.H.: CANF-VC: conditional augmented normalizing flows for video compression. arXiv preprint arXiv:2207.05315 (2022)","DOI":"10.1007\/978-3-031-19787-1_12"},{"issue":"8","key":"18_CR19","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"18_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/978-3-030-58536-5_12","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Hu","year":"2020","unstructured":"Hu, Z., Chen, Z., Xu, D., Lu, G., Ouyang, W., Gu, S.: Improving deep video compression by\u00a0resolution-adaptive flow coding. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12347, pp. 193\u2013209. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58536-5_12"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Hu, Z., Lu, G., Guo, J., Liu, S., Jiang, W., Xu, D.: Coarse-to-fine deep video coding with hyperprior-guided mode prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5921\u20135930 (2022)","DOI":"10.1109\/CVPR52688.2022.00583"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Hu, Z., Lu, G., Xu, D.: FVC: a new framework towards deep video compression in feature space. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1502\u20131511 (2021)","DOI":"10.1109\/CVPR46437.2021.00155"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Huang, C., Li, J., Chu, L., Liu, D., Lu, Y.: Disentangle propagation and restoration for efficient video recovery. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 8336\u20138345 (2023)","DOI":"10.1145\/3581783.3611922"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Huang, C., Li, J., Chu, L., Liu, D., Lu, Y.: Arbitrary-scale video super-resolution guided by dynamic context. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 2294\u20132302 (2024)","DOI":"10.1609\/aaai.v38i3.28003"},{"key":"18_CR25","doi-asserted-by":"crossref","unstructured":"Huang, C., Li, J., Li, B., Liu, D., Lu, Y.: Neural compression-based feature learning for video restoration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5872\u20135881 (2022)","DOI":"10.1109\/CVPR52688.2022.00578"},{"key":"18_CR26","doi-asserted-by":"crossref","unstructured":"Jampani, V., Sun, D., Liu, M.Y., Yang, M.H., Kautz, J.: Superpixel sampling networks. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 352\u2013368 (2018)","DOI":"10.1007\/978-3-030-01234-2_22"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Jia, Z., Li, J., Li, B., Li, H., Lu, Y.: Generative latent coding for ultra-low bitrate image compression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 26088\u201326098 (2024)","DOI":"10.1109\/CVPR52733.2024.02465"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Kim, J.H., Heo, B., Lee, J.S.: Joint global and local hierarchical priors for learned image compression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5992\u20136001 (2022)","DOI":"10.1109\/CVPR52688.2022.00590"},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Ladune, T., Philippe, P., Hamidouche, W., Zhang, L., D\u00e9forges, O.: Optical flow and mode selection for learning-based video coding. In: 2020 IEEE 22nd International Workshop on Multimedia Signal Processing (MMSP), pp.\u00a01\u20136. IEEE (2020)","DOI":"10.1109\/MMSP48831.2020.9287049"},{"key":"18_CR30","unstructured":"Ladune, T., Philippe, P., Hamidouche, W., Zhang, L., D\u00e9forges, O.: Conditional coding and variable bitrate for practical learned video coding. arXiv preprint arXiv:2104.09103 (2021)"},{"key":"18_CR31","unstructured":"Ladune, T., Philippe, P., Hamidouche, W., Zhang, L., D\u00e9forges, O.: Conditional coding for flexible learned video compression. arXiv preprint arXiv:2104.07930 (2021)"},{"key":"18_CR32","unstructured":"Li, J., Li, B., Lu, Y.: Deep contextual video compression. In: Advances in Neural Information Processing Systems 34 (2021)"},{"key":"18_CR33","doi-asserted-by":"crossref","unstructured":"Li, J., Li, B., Lu, Y.: Hybrid spatial-temporal entropy modelling for neural video compression. In: Proceedings of the 30th ACM International Conference on Multimedia (2022)","DOI":"10.1145\/3503161.3547845"},{"key":"18_CR34","doi-asserted-by":"crossref","unstructured":"Li, J., Li, B., Lu, Y.: Neural video compression with diverse contexts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22616\u201322626 (2023)","DOI":"10.1109\/CVPR52729.2023.02166"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Li, J., Li, B., Lu, Y.: Neural video compression with feature modulation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2024, Seattle, WA, USA, 17\u201321 June 2024 (2024)","DOI":"10.1109\/CVPR52733.2024.02466"},{"issue":"6","key":"18_CR36","doi-asserted-by":"publisher","first-page":"1369","DOI":"10.1109\/TCSVT.2017.2657758","volume":"28","author":"J Li","year":"2017","unstructured":"Li, J., Li, B., Xu, J., Xiong, R.: Diversity-based reference picture management for low delay screen content coding. IEEE Trans. Circ. Syst. Video Technol. 28(6), 1369\u20131378 (2017)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"18_CR37","doi-asserted-by":"crossref","unstructured":"Lin, J., Liu, D., Li, H., Wu, F.: M-LVC: multiple frames prediction for learned video compression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3546\u20133554 (2020)","DOI":"10.1109\/CVPR42600.2020.00360"},{"key":"18_CR38","doi-asserted-by":"crossref","unstructured":"Liu, B., Chen, Y., Machineni, R.C., Liu, S., Kim, H.S.: MMVC: learned multi-mode video compression with block-based prediction mode selection and density-adaptive entropy coding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18487\u201318496 (2023)","DOI":"10.1109\/CVPR52729.2023.01773"},{"issue":"3","key":"18_CR39","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1109\/TCSVT.2009.2031442","volume":"20","author":"D Liu","year":"2009","unstructured":"Liu, D., Zhao, D., Ji, X., Gao, W.: Dual frame motion compensation with optimal long-term reference frame selection and bit allocation. IEEE Trans. Circ. Syst. Video Technol. 20(3), 325\u2013339 (2009)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"issue":"8","key":"18_CR40","doi-asserted-by":"publisher","first-page":"3182","DOI":"10.1109\/TCSVT.2020.3035680","volume":"31","author":"H Liu","year":"2020","unstructured":"Liu, H., et al.: Neural video coding using multiscale motion compensation and spatiotemporal context model. IEEE Trans. Circ. Syst. Video Technol. 31(8), 3182\u20133196 (2020)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"18_CR41","doi-asserted-by":"crossref","unstructured":"Liu, H., Shen, H., Huang, L., Lu, M., Chen, T., Ma, Z.: Learned video compression via joint spatial-temporal correlation exploration. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 11580\u201311587 (2020)","DOI":"10.1609\/aaai.v34i07.6825"},{"key":"18_CR42","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"456","DOI":"10.1007\/978-3-030-58536-5_27","volume-title":"Computer Vision \u2013 ECCV 2020","author":"G Lu","year":"2020","unstructured":"Lu, G., et al.: Content adaptive and error propagation aware deep video compression. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12347, pp. 456\u2013472. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58536-5_27"},{"key":"18_CR43","doi-asserted-by":"crossref","unstructured":"Lu, G., Ouyang, W., Xu, D., Zhang, X., Cai, C., Gao, Z.: DVC: an end-to-end deep video compression framework. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11006\u201311015 (2019)","DOI":"10.1109\/CVPR.2019.01126"},{"issue":"10","key":"18_CR44","doi-asserted-by":"publisher","first-page":"3292","DOI":"10.1109\/TPAMI.2020.2988453","volume":"43","author":"G Lu","year":"2020","unstructured":"Lu, G., Zhang, X., Ouyang, W., Chen, L., Gao, Z., Xu, D.: An end-to-end learning framework for video compression. IEEE Trans. Pattern Anal. Mach. Intell. 43(10), 3292\u20133308 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR45","doi-asserted-by":"publisher","first-page":"7863","DOI":"10.1109\/TMM.2024.3372352","volume":"26","author":"W Ma","year":"2024","unstructured":"Ma, W., Li, J., Li, B., Lu, Y.: Uncertainty-aware deep video compression with ensembles. IEEE Trans. Multimedia 26, 7863\u20137872 (2024)","journal-title":"IEEE Trans. Multimedia"},{"key":"18_CR46","unstructured":"Ma, X., et al.: Image as set of points. In: The Eleventh International Conference on Learning Representations (2023)"},{"key":"18_CR47","unstructured":"Mentzer, F., et al.: VCT: a video compression transformer. arXiv preprint arXiv:2206.07307 (2022)"},{"key":"18_CR48","unstructured":"Mentzer, F., Toderici, G.D., Tschannen, M., Agustsson, E.: High-fidelity generative image compression. In: Advances in Neural Information Processing Systems 33, pp. 11913\u201311924 (2020)"},{"key":"18_CR49","doi-asserted-by":"crossref","unstructured":"Mercat, A., Viitanen, M., Vanne, J.: UVG dataset: 50\/120fps 4K sequences for video codec analysis and development. In: Proceedings of the 11th ACM Multimedia Systems Conference, pp. 297\u2013302 (2020)","DOI":"10.1145\/3339825.3394937"},{"key":"18_CR50","doi-asserted-by":"crossref","unstructured":"Neimark, D., Bar, O., Zohar, M., Asselmann, D.: Video transformer network. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3163\u20133172 (2021)","DOI":"10.1109\/ICCVW54120.2021.00355"},{"issue":"10","key":"18_CR51","doi-asserted-by":"publisher","first-page":"1729","DOI":"10.1109\/TCSVT.2014.2302555","volume":"24","author":"M Paul","year":"2014","unstructured":"Paul, M., Lin, W., Lau, C.T., Lee, B.S.: A long-term reference frame for hierarchical B-picture-based video coding. IEEE Trans. Circ. Syst. Video Technol. 24(10), 1729\u20131742 (2014)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"18_CR52","doi-asserted-by":"crossref","unstructured":"Qi, L., Li, J., Li, B., Li, H., Lu, Y.: Motion information propagation for neural video compression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6111\u20136120 (2023)","DOI":"10.1109\/CVPR52729.2023.00592"},{"key":"18_CR53","doi-asserted-by":"crossref","unstructured":"Rippel, O., Anderson, A.G., Tatwawadi, K., Nair, S., Lytle, C., Bourdev, L.: ELF-VC: efficient learned flexible-rate video coding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14479\u201314488 (2021)","DOI":"10.1109\/ICCV48922.2021.01421"},{"key":"18_CR54","doi-asserted-by":"crossref","unstructured":"Rippel, O., Nair, S., Lew, C., Branson, S., Anderson, A.G., Bourdev, L.: Learned video compression. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3454\u20133463 (2019)","DOI":"10.1109\/ICCV.2019.00355"},{"key":"18_CR55","doi-asserted-by":"publisher","first-page":"7311","DOI":"10.1109\/TMM.2022.3220421","volume":"25","author":"X Sheng","year":"2022","unstructured":"Sheng, X., Li, J., Li, B., Li, L., Liu, D., Lu, Y.: Temporal context mining for learned video compression. IEEE Trans. Multimedia 25, 7311\u20137322 (2022)","journal-title":"IEEE Trans. Multimedia"},{"issue":"12","key":"18_CR56","doi-asserted-by":"publisher","first-page":"1649","DOI":"10.1109\/TCSVT.2012.2221191","volume":"22","author":"GJ Sullivan","year":"2012","unstructured":"Sullivan, G.J., Ohm, J.R., Han, W.J., Wiegand, T.: Overview of the high efficiency video coding (HEVC) standard. IEEE Trans. Circ. Syst. Video Technol. 22(12), 1649\u20131668 (2012)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"18_CR57","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1109\/LSP.2007.914928","volume":"15","author":"M Tiwari","year":"2008","unstructured":"Tiwari, M., Cosman, P.C.: Selection of long-term reference frames in dual-frame video coding using simulated annealing. IEEE Sig. Process. Lett. 15, 249\u2013252 (2008)","journal-title":"IEEE Sig. Process. Lett."},{"key":"18_CR58","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems 30 (2017)"},{"key":"18_CR59","unstructured":"Wang, G.H., Li, J., Li, B., Lu, Y.: EVC: towards real-time neural image compression with mask decay. In: International Conference on Learning Representations (2023)"},{"key":"18_CR60","doi-asserted-by":"crossref","unstructured":"Wang, H., et al.: MCL-JCV: a JND-based H.264\/AVC video quality assessment dataset. In: 2016 IEEE International Conference on Image Processing (ICIP), pp. 1509\u20131513. IEEE (2016)","DOI":"10.1109\/ICIP.2016.7532610"},{"issue":"1","key":"18_CR61","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1109\/76.744276","volume":"9","author":"T Wiegand","year":"1999","unstructured":"Wiegand, T., Zhang, X., Girod, B.: Long-term memory motion-compensated prediction. IEEE Trans. Circ. Syst. Video Technol. 9(1), 70\u201384 (1999)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"18_CR62","unstructured":"Xiang, J., Tian, K., Zhang, J.: MIMT: masked image modeling transformer for video compression. In: The Eleventh International Conference on Learning Representations (2022)"},{"key":"18_CR63","doi-asserted-by":"crossref","unstructured":"Xie, F., Chu, L., Li, J., Lu, Y., Ma, C.: VideoTrack: learning to track objects via video transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22826\u201322835 (2023)","DOI":"10.1109\/CVPR52729.2023.02186"},{"key":"18_CR64","doi-asserted-by":"crossref","unstructured":"Xu, X., Wang, J., Ming, X., Lu, Y.: Towards robust video object segmentation with adaptive object calibration. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 2709\u20132718 (2022)","DOI":"10.1145\/3503161.3547824"},{"issue":"8","key":"18_CR65","doi-asserted-by":"publisher","first-page":"1106","DOI":"10.1007\/s11263-018-01144-2","volume":"127","author":"T Xue","year":"2019","unstructured":"Xue, T., Chen, B., Wu, J., Wei, D., Freeman, W.T.: Video enhancement with task-oriented flow. Int. J. Comput. Vis. 127(8), 1106\u20131125 (2019)","journal-title":"Int. J. Comput. Vis."},{"issue":"2","key":"18_CR66","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1109\/JSTSP.2020.3043590","volume":"15","author":"R Yang","year":"2020","unstructured":"Yang, R., Mentzer, F., Van Gool, L., Timofte, R.: Learning for video compression with recurrent auto-encoder and recurrent probability model. IEEE J. Sel. Top. Sig. Process. 15(2), 388\u2013401 (2020)","journal-title":"IEEE J. Sel. Top. Sig. Process."},{"key":"18_CR67","unstructured":"Yang, R., Yang, Y., Marino, J., Mandt, S.: Hierarchical autoregressive modeling for neural video compression. arXiv preprint arXiv:2010.10258 (2020)"},{"key":"18_CR68","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"332","DOI":"10.1007\/978-3-030-58558-7_20","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Yang","year":"2020","unstructured":"Yang, Z., Wei, Y., Yang, Y.: Collaborative video object segmentation by foreground-background integration. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12350, pp. 332\u2013348. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58558-7_20"},{"key":"18_CR69","doi-asserted-by":"publisher","first-page":"288","DOI":"10.1007\/978-3-031-19818-2_17","volume-title":"European Conference on Computer Vision 2022","author":"Q Yu","year":"2022","unstructured":"Yu, Q., et al.: $$k$$-means mask transformer. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13689, pp. 288\u2013307. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19818-2_17"},{"key":"18_CR70","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Duan, Z., Lu, M., Ding, D., Zhu, F., Ma, Z.: Another way to the top: exploit contextual clustering in learned image coding (2024)","DOI":"10.1609\/aaai.v38i8.28791"},{"key":"18_CR71","doi-asserted-by":"crossref","unstructured":"Zhao, J., Li, B., Li, J., Xiong, R., Lu, Y.: A universal encoder rate distortion optimization framework for learned compression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1880\u20131884 (2021)","DOI":"10.1109\/CVPRW53098.2021.00210"},{"issue":"1","key":"18_CR72","first-page":"1","volume":"20","author":"J Zhao","year":"2023","unstructured":"Zhao, J., Li, B., Li, J., Xiong, R., Lu, Y.: A universal optimization framework for learning-based image codec. ACM Trans. Multimed. Comput. Commun. Appl. 20(1), 1\u201319 (2023)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72848-8_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T14:09:25Z","timestamp":1732802965000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72848-8_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9783031728471","9783031728488"],"references-count":72,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72848-8_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}