{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T05:09:55Z","timestamp":1773119395383,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612500","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"9205-9214","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":27,"title":["DeepSVC: Deep Scalable Video Coding for Both Machine and Human Vision"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8327-3420","authenticated-orcid":false,"given":"Hongbin","family":"Lin","sequence":"first","affiliation":[{"name":"Fuzhou University, Fuzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7076-4739","authenticated-orcid":false,"given":"Bolin","family":"Chen","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4975-3525","authenticated-orcid":false,"given":"Zhichen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Fuzhou University, Fuzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7957-2858","authenticated-orcid":false,"given":"Jielian","family":"Lin","sequence":"additional","affiliation":[{"name":"Fuzhou University, Fuzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2948-6468","authenticated-orcid":false,"given":"Xu","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7497-8883","authenticated-orcid":false,"given":"Tiesong","family":"Zhao","sequence":"additional","affiliation":[{"name":"Fuzhou University, Fuzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"2001. Video trace library. http:\/\/trace.eas.asu.edu\/yuv\/index.html. (2001)."},{"key":"e_1_3_2_2_2_1","volume-title":"CompressAI: A PyTorch Library and Evaluation Platform for End-to-End Compression Research. arXiv preprint arXiv:2011.03029","author":"B\u00e9gaint Jean","year":"2020","unstructured":"Jean B\u00e9gaint, Fabien Racap\u00e9, Simon Feltman, and Akshay Pushparaja. 2020. CompressAI: A PyTorch Library and Evaluation Platform for End-to-End Compression Research. arXiv preprint arXiv:2011.03029 (2020)."},{"key":"e_1_3_2_2_3_1","volume-title":"Calculation of Average PSNR Differences Between RD-Curves. Doc. VCEG-M33","author":"Bjontegaard Gisle","year":"2001","unstructured":"Gisle Bjontegaard. 2001. Calculation of Average PSNR Differences Between RD-Curves. Doc. VCEG-M33, ITU-T Video Coding Experts Group (VCEG) (Jan. 2001)."},{"key":"e_1_3_2_2_4_1","volume-title":"Joint Collaborative Team on Video Coding (JCT-VC) (Jan.","author":"Bossen Frank","year":"2013","unstructured":"Frank Bossen. 2013. Common Test Conditions and Software Reference Configurations. Doc. JCTVC-L1100, Joint Collaborative Team on Video Coding (JCT-VC) (Jan. 2013)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2015.2461951"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3101953"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2941660"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3160602"},{"key":"e_1_3_2_2_10_1","volume-title":"Scalable Video Coding for Humans and Machines. In 2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP). 1--6.","author":"Choi Hyomin","year":"2022","unstructured":"Hyomin Choi and Ivan V Baji\u0107. 2022. Scalable Video Coding for Humans and Machines. In 2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP). 1--6."},{"key":"e_1_3_2_2_11_1","unstructured":"MMTracking Contributors. 2020. MMTracking: OpenMMLab Video Perception Toolbox and Benchmark. https:\/\/github.com\/open-mmlab\/mmtracking."},{"key":"e_1_3_2_2_12_1","unstructured":"MMAction2 Contributors. 2020. OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark. https:\/\/github.com\/open-mmlab\/mmaction2."},{"key":"e_1_3_2_2_13_1","volume-title":"Swift: Adaptive Video Streaming with Layered Neural Codecs. In 2022 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI). 103--118","author":"Dasari Mallesham","year":"2022","unstructured":"Mallesham Dasari, Kumara Kahatapitiya, Samir R Das, Aruna Balasubramanian, and Dimitris Samaras. 2022. Swift: Adaptive Video Streaming with Layered Neural Codecs. In 2022 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI). 103--118."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3016485"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2015.2500034"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2018.2873844"},{"key":"e_1_3_2_2_17_1","volume-title":"Low-Rate Image Compression with Super-Resolution Learning. In 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW). 154--155","author":"Gao Wei","year":"2020","unstructured":"Wei Gao, Lvfang Tao, Linjie Zhou, Dinghao Yang, Xiaoyu Zhang, and Zixuan Guo. 2020. Low-Rate Image Compression with Super-Resolution Learning. In 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW). 154--155."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16234"},{"key":"e_1_3_2_2_19_1","volume-title":"Deep Residual Learning for Image Recognition. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 770--778","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 770--778."},{"key":"e_1_3_2_2_20_1","volume-title":"Towards Coding for Human and Machine Vision: A Scalable Image Coding Approach. In 2020 IEEE International Conference on Multimedia and Expo (ICME). 1--6.","author":"Hu Yueyu","year":"2020","unstructured":"Yueyu Hu, Shuai Yang, Wenhan Yang, Ling-Yu Duan, and Jiaying Liu. 2020. Towards Coding for Human and Machine Vision: A Scalable Image Coding Approach. In 2020 IEEE International Conference on Multimedia and Expo (ICME). 1--6."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00155"},{"key":"e_1_3_2_2_22_1","volume-title":"HMFVC: A Human-Machine Friendly Video Compression Scheme","author":"Huang Zhimeng","year":"2022","unstructured":"Zhimeng Huang, Chuanmin Jia, Shanshe Wang, and Siwei Ma. 2022. HMFVC: A Human-Machine Friendly Video Compression Scheme. IEEE Transactions on Circuits and Systems for Video Technology (2022)."},{"key":"e_1_3_2_2_23_1","volume-title":"Slow and Steady Feature Analysis: Higher Order Temporal Coherence in Video. In 2016 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 3852--3861","author":"Jayaraman Dinesh","year":"2016","unstructured":"Dinesh Jayaraman and Kristen Grauman. 2016. Slow and Steady Feature Analysis: Higher Order Temporal Coherence in Video. In 2016 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 3852--3861."},{"key":"e_1_3_2_2_24_1","volume-title":"HMDB: A Large Video Database for Human Motion Recognition. In 2011 Proceedings of the IEEE International Conference on Computer Vision (ICCV). 2556--2563","author":"Kuehne Hildegard","year":"2011","unstructured":"Hildegard Kuehne, Hueihan Jhuang, Est\u00edbaliz Garrote, Tomaso Poggio, and Thomas Serre. 2011. HMDB: A Large Video Database for Human Motion Recognition. In 2011 Proceedings of the IEEE International Conference on Computer Vision (ICCV). 2556--2563."},{"key":"e_1_3_2_2_25_1","volume-title":"Image Coding for Machines: An End-to-End Learned Approach. In 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 1590--1594","author":"Le Nam","year":"2021","unstructured":"Nam Le, Honglei Zhang, Francesco Cricri, Ramin Ghaznavi-Youvalari, and Esa Rahtu. 2021. Image Coding for Machines: An End-to-End Learned Approach. In 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 1590--1594."},{"key":"e_1_3_2_2_26_1","first-page":"18114","article-title":"Deep Contextual Video Compression","volume":"34","author":"Li Jiahao","year":"2021","unstructured":"Jiahao Li, Bin Li, and Yan Lu. 2021. Deep Contextual Video Compression. Advances in Neural Information Processing Systems 34 (2021), 18114--18125.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_27_1","volume-title":"Hybrid Spatial-Temporal Entropy Modelling for Neural Video Compression. In 2022 Proceedings of the 30th ACM International Conference on Multimedia (ACM MM). 1503--1511","author":"Li Jiahao","year":"2022","unstructured":"Jiahao Li, Bin Li, and Yan Lu. 2022. Hybrid Spatial-Temporal Entropy Modelling for Neural Video Compression. In 2022 Proceedings of the 30th ACM International Conference on Multimedia (ACM MM). 1503--1511."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"crossref","unstructured":"Jerry Liu Shenlong Wang Wei-Chiu Ma Meet Shah Rui Hu Pranaab Dhawan and Raquel Urtasun. 2020. Conditional Entropy Coding for Efficient Video Compression. In 2020 Proceedings of the European Conference on Computer Vision (ECCV). 453--468.","DOI":"10.1007\/978-3-030-58520-4_27"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_2_30_1","volume-title":"DVC: An End-to-End Deep Video Compression Framework. In 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 10998--11007","author":"Lu Guo","year":"2019","unstructured":"Guo Lu, Wanli Ouyang, Dong Xu, Xiaoyun Zhang, Chunlei Cai, and Zhiyong Gao. 2019. DVC: An End-to-End Deep Video Compression Framework. In 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 10998--11007."},{"key":"e_1_3_2_2_31_1","volume-title":"Supervised Compression for Resource-Constrained Edge Computing Systems. In 2022 IEEE\/CVF Winter Conference on Applications of Computer Vision (WCACV). 2685--2695","author":"Matsubara Yoshitomo","year":"2022","unstructured":"Yoshitomo Matsubara, Ruihan Yang, Marco Levorato, and Stephan Mandt. 2022. Supervised Compression for Resource-Constrained Edge Computing Systems. In 2022 IEEE\/CVF Winter Conference on Applications of Computer Vision (WCACV). 2685--2695."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3339825.3394937"},{"key":"e_1_3_2_2_33_1","volume-title":"Channel-Wise Autoregressive Entropy Models for Learned Image Compression. In 2020 IEEE International Conference on Image Processing (ICIP). 3339--3343","author":"Minnen David","year":"2020","unstructured":"David Minnen and Saurabh Singh. 2020. Channel-Wise Autoregressive Entropy Models for Learned Image Compression. In 2020 IEEE International Conference on Image Processing (ICIP). 3339--3343."},{"key":"e_1_3_2_2_34_1","volume-title":"Video Feature Compression for Machine Tasks. In 2022 IEEE International Conference on Multimedia and Expo (ICME). 1--6.","author":"Misra Kiran","year":"2022","unstructured":"Kiran Misra, Tianying Ji, Andrew Segall, and Frank Bossen. 2022. Video Feature Compression for Machine Tasks. In 2022 IEEE International Conference on Multimedia and Expo (ICME). 1--6."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2017.8050296"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.291"},{"key":"e_1_3_2_2_37_1","volume-title":"ELF-VC: Efficient Learned Flexible-Rate Video Coding. In 2021 Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 14479--14488","author":"Rippel Oren","year":"2021","unstructured":"Oren Rippel, Alexander G. Anderson, Kedar Tatwawadi, Sanjay Nair, Craig Lytle, and Lubomir Bourdev. 2021. ELF-VC: Efficient Learned Flexible-Rate Video Coding. In 2021 Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 14479--14488."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"crossref","unstructured":"Olaf Ronneberger Philipp Fischer and Thomas Brox. 2015. U-Net: Convolutional Networks for Biomedical Image Segmentation. In 2015 Medical Image Computing and Computer-Assisted Intervention (MICCAI). 234--241.","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3220421"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.207"},{"key":"e_1_3_2_2_42_1","volume-title":"Amir Roshan Zamir, and Mubarak Shah","author":"Soomro Khurram","year":"2012","unstructured":"Khurram Soomro, Amir Roshan Zamir, and Mubarak Shah. 2012. UCF101: A Dataset of 101 Human Actions Classes from Videos in the Wild. arXiv preprint arXiv:1212.0402 (2012)."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2221191"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"e_1_3_2_2_45_1","volume-title":"Towards Analysis-Friendly Face Representation with Scalable Feature and Texture Compression","author":"Wang Shurun","year":"2021","unstructured":"Shurun Wang, Shiqi Wang, Wenhan Yang, Xinfeng Zhang, Shanshe Wang, Siwei Ma, and Wen Gao. 2021. Towards Analysis-Friendly Face Representation with Scalable Feature and Texture Compression. IEEE Transactions on Multimedia (2021)."},{"key":"e_1_3_2_2_46_1","volume-title":"Multiscale Structural Similarity for Image Quality Assessment. In The Thrity-Seventh Asilomar Conference on Signals, Systems & Computers","volume":"2","author":"Simoncelli Eero P","year":"2003","unstructured":"ZhouWang, Eero P Simoncelli, and Alan C Bovik. 2003. Multiscale Structural Similarity for Image Quality Assessment. In The Thrity-Seventh Asilomar Conference on Signals, Systems & Computers, Vol. 2. 1398--1402."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2003.815165"},{"key":"e_1_3_2_2_48_1","volume-title":"Deep Image Compression with Latent Optimization and Piece-Wise Quantization Approximation. In 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 1926--1930","author":"Wu Yuyang","year":"2021","unstructured":"Yuyang Wu, Zhiyang Qi, Huiming Zheng, Lvfang Tao, and Wei Gao. 2021. Deep Image Compression with Latent Optimization and Piece-Wise Quantization Approximation. In 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 1926--1930."},{"key":"e_1_3_2_2_49_1","volume-title":"An Emerging Coding Paradigm VCM: A Scalable Coding Approach Beyond Feature and Signal. In 2020 IEEE International Conference on Multimedia and Expo (ICME). 1--6.","author":"Xia Sifeng","year":"2020","unstructured":"Sifeng Xia, Kunchangtai Liang, Wenhan Yang, Ling-Yu Duan, and Jiaying Liu. 2020. An Emerging Coding Paradigm VCM: A Scalable Coding Approach Beyond Feature and Signal. In 2020 IEEE International Conference on Multimedia and Expo (ICME). 1--6."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-01144-2"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3121131"},{"key":"e_1_3_2_2_52_1","volume-title":"Advancing Learned Video Compression with In-Loop Frame Prediction","author":"Yang Ren","year":"2022","unstructured":"Ren Yang, Radu Timofte, and Luc Van Gool. 2022. Advancing Learned Video Compression with In-Loop Frame Prediction. IEEE Transactions on Circuits and Systems for Video Technology (2022)."},{"key":"e_1_3_2_2_53_1","volume-title":"Task-Driven Video Compression for Humans and Machines: Framework Design and Optimization","author":"Yi Xiaokai","year":"2022","unstructured":"Xiaokai Yi, Hanli Wang, Sam Kwong, and C-C Jay Kuo. 2022. Task-Driven Video Compression for Humans and Machines: Framework Design and Optimization. IEEE Transactions on Multimedia (2022)."},{"key":"e_1_3_2_2_54_1","volume-title":"MSFC: Deep Feature Compression in Multi-Task Network. In 2021 IEEE International Conference on Multimedia and Expo (ICME). 1--6.","author":"Zhang Zhicong","year":"2021","unstructured":"Zhicong Zhang, Mengyang Wang, Mengyao Ma, Jiahui Li, and Xiaopeng Fan. 2021. MSFC: Deep Feature Compression in Multi-Task Network. In 2021 IEEE International Conference on Multimedia and Expo (ICME). 1--6."},{"key":"e_1_3_2_2_55_1","volume-title":"Learning-Based Video Coding with Joint Deep Compression and Enhancement. In 2022 Proceedings of the 30th ACM International Conference on Multimedia (ACM MM). 3045--3054","author":"Zhao Tiesong","year":"2022","unstructured":"Tiesong Zhao, Weize Feng, HongJi Zeng, Yiwen Xu, Yuzhen Niu, and Jiaying Liu. 2022. Learning-Based Video Coding with Joint Deep Compression and Enhancement. In 2022 Proceedings of the 30th ACM International Conference on Multimedia (ACM MM). 3045--3054."},{"key":"e_1_3_2_2_56_1","volume-title":"Deep Feature Flow for Video Recognition. In 2017 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 2349--2358","author":"Zhu Xizhou","year":"2017","unstructured":"Xizhou Zhu, Yuwen Xiong, Jifeng Dai, Lu Yuan, and Yichen Wei. 2017. Deep Feature Flow for Video Recognition. In 2017 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 2349--2358."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612500","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612500","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:03:04Z","timestamp":1755820984000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612500"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":56,"alternative-id":["10.1145\/3581783.3612500","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612500","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}