{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,24]],"date-time":"2026-07-24T14:56:40Z","timestamp":1784905000590,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the Basic and Frontier Research Project of PCL"},{"name":"the National Natural Science Foundation of China","award":["62088102"],"award-info":[{"award-number":["62088102"]}]},{"name":"the National Natural Science Foundation of China","award":["61972129"],"award-info":[{"award-number":["61972129"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611851","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"1431-1442","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Toward Scalable Image Feature Compression: A Content-Adaptive and Diffusion-Based Approach"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-9111-4084","authenticated-orcid":false,"given":"Sha","family":"Guo","sequence":"first","affiliation":[{"name":"Peking University &amp; Peng Cheng Laboratory, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0563-1760","authenticated-orcid":false,"given":"Zhuo","family":"Chen","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4032-8049","authenticated-orcid":false,"given":"Yang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3985-4305","authenticated-orcid":false,"given":"Ning","family":"Zhang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8219-4176","authenticated-orcid":false,"given":"Xiaotong","family":"Li","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4491-2023","authenticated-orcid":false,"given":"Lingyu","family":"Duan","sequence":"additional","affiliation":[{"name":"Peking University &amp; Peng Cheng Laboratory, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"International Conference on Learning Representations.","author":"Ball\u00e9 Johannes","year":"2018","unstructured":"Johannes Ball\u00e9, Nick Johnston, and David Minnen. 2018. Integer networks for data compression with latent-variable models. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_2_1","volume-title":"Sung Jin Hwang, and Nick Johnston","author":"Ball\u00e9 Johannes","year":"2018","unstructured":"Johannes Ball\u00e9, David Minnen, Saurabh Singh, Sung Jin Hwang, and Nick Johnston. 2018. Variational image compression with a scale hyperprior. arXiv preprint arXiv:1802.01436 (2018)."},{"key":"e_1_3_2_1_3_1","volume-title":"Cold diffusion: Inverting arbitrary image transforms without noise. arXiv preprint arXiv:2208.09392","author":"Bansal Arpit","year":"2022","unstructured":"Arpit Bansal, Eitan Borgnia, Hong-Min Chu, Jie S Li, Hamid Kazemi, Furong Huang, Micah Goldblum, Jonas Geiping, and Tom Goldstein. 2022. Cold diffusion: Inverting arbitrary image transforms without noise. arXiv preprint arXiv:2208.09392 (2022)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00652"},{"key":"e_1_3_2_1_5_1","volume-title":"The Fourier transform and its applications NY","author":"Bracewell Ronald Newbold","unstructured":"Ronald Newbold Bracewell. 1986. The Fourier transform and its applications NY, McGraw-Hill."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3101953"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00324"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00525"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00520"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475419"},{"key":"e_1_3_2_1_11_1","first-page":"30150","article-title":"Genie: Higher-order denoising diffusion solvers","volume":"35","author":"Dockhorn Tim","year":"2022","unstructured":"Tim Dockhorn, Arash Vahdat, and Karsten Kreis. 2022. Genie: Higher-order denoising diffusion solvers. Advances in Neural Information Processing Systems 35 (2022), 30150--30166.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3016485"},{"key":"e_1_3_2_1_13_1","volume-title":"Generative adversarial nets in advances in neural information processing systems (NIPS). Curran Associates","author":"Goodfellow Ian","year":"2014","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative adversarial nets in advances in neural information processing systems (NIPS). Curran Associates, Inc. Red Hook, NY, USA (2014), 2672--2680."},{"key":"e_1_3_2_1_14_1","volume-title":"Neural Image Compression with a Diffusion-Based Decoder. arXiv preprint arXiv:2301.05489","author":"Goose Noor Fathima","year":"2023","unstructured":"Noor Fathima Goose, Jens Petersen, Auke Wiggers, Tianlin Xu, and Guillaume Sautiere. 2023. Neural Image Compression with a Diffusion-Based Decoder. arXiv preprint arXiv:2301.05489 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.2307\/2346830"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00563"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_2_1_18_1","unstructured":"Martin Heusel Hubert Ramsauer Thomas Unterthiner Bernhard Nessler and Sepp Hochreiter. 2017. GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium. In Advances in Neural Information Processing Systems. 6626--6637."},{"key":"e_1_3_2_1_19_1","unstructured":"Jonathan Ho William Chan Chitwan Saharia Jay Whang Ruiqi Gao Alexey Gritsenko Diederik P Kingma Ben Poole Mohammad Norouzi David J Fleet et al. 2022. Imagen video: High definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)."},{"key":"e_1_3_2_1_20_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems 33 (2020), 6840--6851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","first-page":"1","article-title":"Cascaded Diffusion Models for High Fidelity Image Generation","volume":"23","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho, Chitwan Saharia, William Chan, David J Fleet, Mohammad Norouzi, and Tim Salimans. 2022. Cascaded Diffusion Models for High Fidelity Image Generation. J. Mach. Learn. Res. 23, 47 (2022), 1--33.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3040367"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_1_24_1","volume-title":"Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114","author":"Kingma Diederik P","year":"2013","unstructured":"Diederik P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2221525"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00399"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2727682"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00404"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2020.2966182"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01611"},{"key":"e_1_3_2_1_32_1","volume-title":"Knowledge distillation in iterative generative models for improved sampling speed. arXiv preprint arXiv:2101.02388","author":"Luhman Eric","year":"2021","unstructured":"Eric Luhman and Troy Luhman. 2021. Knowledge distillation in iterative generative models for improved sampling speed. arXiv preprint arXiv:2101.02388 (2021)."},{"key":"e_1_3_2_1_33_1","volume-title":"Accelerating diffusion models via early stop of the diffusion process. arXiv preprint arXiv:2205.12524","author":"Lyu Zhaoyang","year":"2022","unstructured":"Zhaoyang Lyu, Xudong Xu, Ceyuan Yang, Dahua Lin, and Bo Dai. 2022. Accelerating diffusion models via early stop of the diffusion process. arXiv preprint arXiv:2205.12524 (2022)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2014.2371951"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01374"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Fabian Mentzer Eirikur Agustsson Michael Tschannen Radu Timofte and Luc Van Gool. 2018. Conditional probability models for deep image compression. In Advances in Neural Information Processing Systems (NeurIPS). 4390--4401.","DOI":"10.1109\/CVPR.2018.00462"},{"key":"e_1_3_2_1_37_1","first-page":"11913","article-title":"High-fidelity generative image compression","volume":"33","author":"Mentzer Fabian","year":"2020","unstructured":"Fabian Mentzer, George D Toderici, Michael Tschannen, and Eirikur Agustsson. 2020. High-fidelity generative image compression. Advances in Neural Information Processing Systems 33 (2020), 11913--11924.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_38_1","volume-title":"Joint autoregressive and hierarchical priors for learned image compression. Advances in neural information processing systems 31","author":"Minnen David","year":"2018","unstructured":"David Minnen, Johannes Ball\u00e9, and George D Toderici. 2018. Joint autoregressive and hierarchical priors for learned image compression. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8296792"},{"key":"e_1_3_2_1_40_1","first-page":"327","article-title":"WebP: A new image format for the web","volume":"74","author":"Mukherjee Debargha","year":"2014","unstructured":"Debargha Mukherjee and Sanjit K Mitra. 2014. WebP: A new image format for the web. Journal of Signal Processing Systems 74, 3 (2014), 327--338.","journal-title":"Journal of Signal Processing Systems"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58545-7_19"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3072430"},{"key":"e_1_3_2_1_43_1","first-page":"1485","article-title":"x265: A HEVC\/H.265 Video Encoder Implementation","volume":"23","author":"Ramachandran Pradeep","year":"2013","unstructured":"Pradeep Ramachandran, Dzung T Nguyen, Vinod Pandit, Cheng Xu, Jianle Li, San Li, Shijun Li, Wenli Xu, Wei Liu, Zongming Li, et al. 2013. x265: A HEVC\/H.265 Video Encoder Implementation. IEEE Transactions on Circuits and Systems for Video Technology 23, 9 (2013), 1485--1497.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_45_1","volume-title":"Progressive distillation for fast sampling of diffusion models. arXiv preprint arXiv:2202.00512","author":"Salimans Tim","year":"2022","unstructured":"Tim Salimans and Jonathan Ho. 2022. Progressive distillation for fast sampling of diffusion models. arXiv preprint arXiv:2202.00512 (2022)."},{"key":"e_1_3_2_1_46_1","first-page":"157","article-title":"WebP: A new image format for the Web","volume":"27","author":"Salomon J\u00e9r\u00e9my","year":"2012","unstructured":"J\u00e9r\u00e9my Salomon and Thomas Lecroq. 2012. WebP: A new image format for the Web. Signal Processing: Image Communication 27, 3 (2012), 157--167.","journal-title":"Signal Processing: Image Communication"},{"key":"e_1_3_2_1_47_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_48_1","volume-title":"International conference on machine learning. PMLR, 2256--2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International conference on machine learning. PMLR, 2256--2265."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2221191"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2970248"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/30.125072"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3243310"},{"key":"e_1_3_2_1_53_1","volume-title":"Internimage: Exploring large-scale vision foundation models with deformable convolutions. arXiv preprint arXiv:2211.05778","author":"Wang Wenhai","year":"2022","unstructured":"Wenhai Wang, Jifeng Dai, Zhe Chen, Zhenhang Huang, Zhiqi Li, Xizhou Zhu, Xiaowei Hu, Tong Lu, Lewei Lu, Hongsheng Li, et al. 2022. Internimage: Exploring large-scale vision foundation models with deformable convolutions. arXiv preprint arXiv:2211.05778 (2022)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00304"},{"key":"e_1_3_2_1_55_1","volume-title":"International Conference on Learning Representations.","author":"Watson Daniel","year":"2021","unstructured":"Daniel Watson, William Chan, Jonathan Ho, and Mohammad Norouzi. 2021. Learning fast samplers for diffusion models by differentiating through sample quality. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2003.815165"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01688-4"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475213"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.596"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_61_1","volume-title":"Truncated diffusion probabilistic models. stat 1050","author":"Zheng Huangjie","year":"2022","unstructured":"Huangjie Zheng, Pengcheng He, Weizhu Chen, and Mingyuan Zhou. 2022. Truncated diffusion probabilistic models. stat 1050 (2022), 7."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.3390\/app11041380"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611851","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611851","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:06:09Z","timestamp":1755821169000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611851"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":62,"alternative-id":["10.1145\/3581783.3611851","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611851","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}