{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T08:14:56Z","timestamp":1775808896601,"version":"3.50.1"},"reference-count":327,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62371434"],"award-info":[{"award-number":["62371434"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62021001"],"award-info":[{"award-number":["62021001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"JSPS KAKENHI","award":["JP23K16861"],"award-info":[{"award-number":["JP23K16861"]}]},{"name":"U.K. Research and Innovation (UKRI) MyWorld Strength in Places Program","award":["SIPF00006\/1"],"award-info":[{"award-number":["SIPF00006\/1"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Emerg. Sel. Topics Circuits Syst."],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1109\/jetcas.2024.3403524","type":"journal-article","created":{"date-parts":[[2024,5,21]],"date-time":"2024-05-21T17:26:18Z","timestamp":1716312378000},"page":"149-171","source":"Crossref","is-referenced-by-count":9,"title":["Survey on Visual Signal Coding and Processing With Generative Models: Technologies, Standards, and Optimization"],"prefix":"10.1109","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8525-5066","authenticated-orcid":false,"given":"Zhibo","family":"Chen","sequence":"first","affiliation":[{"name":"Department of Electronic Engineering and Information Science, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5583-4895","authenticated-orcid":false,"given":"Heming","family":"Sun","sequence":"additional","affiliation":[{"name":"Faculty of Engineering, Yokohama National University, Kanagawa, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2118-4876","authenticated-orcid":false,"given":"Li","family":"Zhang","sequence":"additional","affiliation":[{"name":"Bytedance Inc., San Diego, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6623-9936","authenticated-orcid":false,"given":"Fan","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science, University of Bristol, Bristol, U.K."}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969125"},{"key":"ref2","article-title":"Conditional generative adversarial nets","author":"Mirza","year":"2014","journal-title":"arXiv:1411.1784"},{"key":"ref3","first-page":"2172","article-title":"InfoGAN: Interpretable representation learning by information maximizing generative adversarial nets","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Chen"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3109419"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00660"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3034267"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00165"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10804"},{"key":"ref11","first-page":"17022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Kong"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3476576.3476732"},{"key":"ref13","article-title":"Towards principled methods for training generative adversarial networks","author":"Arjovsky","year":"2017","journal-title":"arXiv:1701.04862"},{"key":"ref14","first-page":"214","article-title":"Wasserstein generative adversarial networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Arjovsky"},{"key":"ref15","article-title":"Auto-encoding variational Bayes","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Kingma"},{"key":"ref16","article-title":"Importance weighted autoencoders","volume-title":"Proc. 4th Int. Conf. Learn. Represent. (ICLR)","author":"Burda"},{"key":"ref17","article-title":"Preventing posterior collapse with delta-vaes","volume-title":"Proc. 7th Int. Conf. Learn. Represent. (ICLR)","author":"Razavi"},{"key":"ref18","first-page":"19667","article-title":"NVAE: A deep hierarchical variational autoencoder","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","volume":"33","author":"Vahdat"},{"key":"ref19","first-page":"4736","article-title":"Improving variational autoencoders with inverse autoregressive flow","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Kingma"},{"key":"ref20","article-title":"Very deep VAEs generalize autoregressive models and can outperform them on images","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Child"},{"key":"ref21","first-page":"3936","article-title":"Autoregressive quantile networks for generative modeling","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ostrovski"},{"key":"ref22","first-page":"1747","article-title":"Pixel recurrent neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Van Den Oord"},{"key":"ref23","first-page":"4790","article-title":"Conditional image generation with PixelCNN decoders","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Van den Oord"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3116668"},{"key":"ref25","article-title":"Autoregressive diffusion models","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Hoogeboom"},{"key":"ref26","first-page":"39957","article-title":"AR-Diffusion: Auto-regressive diffusion model for text generation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Wu"},{"key":"ref27","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Ho"},{"key":"ref28","first-page":"1530","article-title":"Variational inference with normalizing flows","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","volume":"37","author":"Rezende"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2992934"},{"key":"ref30","article-title":"FFJORD: Free-form continuous dynamics for scalable reversible generative models","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Grathwohl"},{"key":"ref31","article-title":"Density estimation using real nvp","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Dinh"},{"key":"ref32","first-page":"10236","article-title":"Glow: Generative flow with invertible 1\u00d71 convolutions","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Kingma"},{"key":"ref33","first-page":"9913","article-title":"Residual flows for invertible generative modeling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Chen"},{"key":"ref34","article-title":"Building normalizing flows with stochastic interpolants","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Albergo"},{"key":"ref35","first-page":"16280","article-title":"Diffusion normalizing flow","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Zhang"},{"key":"ref36","article-title":"Diffusion models with deterministic normalizing flow priors","author":"Zand","year":"2023","journal-title":"arXiv:2309.01274"},{"key":"ref37","article-title":"Score-based generative modeling through stochastic differential equations","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Song"},{"key":"ref38","article-title":"Denoising diffusion implicit models","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Song"},{"key":"ref39","first-page":"5775","article-title":"DPM-Solver: A fast ode solver for diffusion probabilistic model sampling in around 10 steps","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Lu"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591513"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00218"},{"key":"ref44","article-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Gal"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"ref46","article-title":"Stable video diffusion: Scaling latent video diffusion models to large datasets","author":"Blattmann","year":"2023","journal-title":"arXiv:2311.15127"},{"key":"ref47","article-title":"DreamFusion: Text-to-3D using 2D diffusion","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Poole"},{"key":"ref48","first-page":"2256","article-title":"Deep unsupervised learning using nonequilibrium thermodynamics","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sohl-Dickstein"},{"key":"ref49","article-title":"End-to-end optimized image compression","volume-title":"Proc. Int. Conf. Learn. Representat.","author":"Ball\u00e9"},{"key":"ref50","article-title":"Lossy compression with Gaussian diffusion","author":"Theis","year":"2022","journal-title":"arXiv:2206.08889"},{"key":"ref51","first-page":"10794","article-title":"Joint autoregressive and hierarchical priors for learned image compression","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Minnen"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475213"},{"key":"ref53","first-page":"3920","article-title":"Soft then hard: Rethinking the quantization in neural image compression","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Guo"},{"key":"ref54","article-title":"Variational image compression with a scale hyperprior","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Ball\u00e9"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3089491"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00563"},{"key":"ref57","first-page":"11913","article-title":"High-fidelity generative image compression","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Mentzer"},{"key":"ref58","first-page":"1141","article-title":"Soft-to-hard vector quantization for end-to-end learning compressible representations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Agustsson"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00591"},{"key":"ref60","first-page":"573","article-title":"Improving inference for neural image compression","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Yang"},{"key":"ref61","first-page":"21696","article-title":"Variational diffusion models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Kingma"},{"key":"ref62","first-page":"16131","article-title":"Compressing images by encoding their latent representations with relative entropy coding","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Flamich"},{"key":"ref63","first-page":"6548","article-title":"Fast relative entropy coding with A* coding","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Flamich"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP40778.2020.9190935"},{"key":"ref65","first-page":"12878","article-title":"Idempotent learned image compression with right-inverse","volume-title":"Proc. 37th Conf. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref66","article-title":"Lossy image compression with normalizing flows","volume-title":"Proc. Neural Compression Inf. Theory Appl. Workshop @ ICLR","author":"Helminger"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/OJCAS.2021.3123201"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19787-1_12"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3301016"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00988"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2957990"},{"key":"ref72","first-page":"12134","article-title":"Integer discrete flows and lossless compression","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Hoogeboom"},{"key":"ref73","article-title":"IDF++: Analyzing and improving integer discrete flows for lossless compression","author":"van den Berg","year":"2020","journal-title":"arXiv:2006.12459"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00652"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00031"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02138"},{"key":"ref77","first-page":"64971","article-title":"Lossy image compression with conditional diffusion models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Yang"},{"key":"ref78","article-title":"A residual diffusion model for high perceptual quality codec augmentation","author":"Ghouse","year":"2023","journal-title":"arXiv:2301.05489"},{"key":"ref79","article-title":"High-fidelity image compression with score-based generative models","author":"Hoogeboom","year":"2023","journal-title":"arXiv:2305.18231"},{"key":"ref80","volume-title":"Call for Learning-based Video Codecs for Study of Quality Assessment","author":"Ye","year":"2023"},{"issue":"4","key":"ref81","first-page":"142","article-title":"Coding theorems for a discrete source with a fidelity criterion","volume":"4","author":"Shannon","year":"1959","journal-title":"IRE Nat. Conv. Rec."},{"key":"ref82","first-page":"675","article-title":"Rethinking lossy compression: The rate-distortion-perception tradeoff","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Blau"},{"key":"ref83","first-page":"11682","article-title":"On perceptual lossy compression: The cost of perceptual reconstruction and an optimal training framework","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yan"},{"key":"ref84","first-page":"11517","article-title":"Universal rate-distortion-perception representations for lossy compression","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Zhang"},{"key":"ref85","first-page":"25661","article-title":"A theory of the distortion-perception tradeoff in Wasserstein space","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Freirich"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/JSAIT.2022.3231820"},{"key":"ref87","article-title":"Idempotence and perceptual image compression","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Xu"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00713"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_27"},{"key":"ref90","first-page":"13091","article-title":"VCT: A video compression transformer","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Mentzer"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2019.2892608"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00355"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01126"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_12"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00853"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00360"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00155"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2988453"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2020.3043590"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00583"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3287495"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_26"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00652"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01031"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00666"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00661"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3138300"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3222418"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP49062.2020.9231841"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/MMSP48831.2020.9287049"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01421"},{"key":"ref112","first-page":"18114","article-title":"Deep contextual video compression","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","volume":"34","author":"Li"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/PCS56426.2022.10018080"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3220421"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547845"},{"key":"ref116","article-title":"MIMT: Masked image modeling transformer for video compression","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Xiang"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00592"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02166"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3359948"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/214"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19809-0_32"},{"key":"ref122","first-page":"48226","article-title":"On the choice of perception loss function for learned video compression","volume-title":"Proc. ICML Workshop Neural Compression Inf. Theory Appl.","author":"Phan"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.3390\/e25101469"},{"key":"ref124","volume-title":"JPEG AI Overview Slide","author":"Alshina","year":"2023"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3313974"},{"key":"ref126","volume-title":"Bytedance\u2019s Response to the JPEG AI Call for Proposals","author":"Esenlik","year":"2023"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/30.125072"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/79.952804"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2003.815165"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2221191"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2021.3058584"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3101953"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2223011"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3072430"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3087706"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3072202"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00796"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2023.3245919"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1109\/ACSSC.2003.1292216"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2895768"},{"key":"ref141","volume-title":"JPEG AI Common Training and Test Conditions","year":"2022"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/BMSB.2017.7986143"},{"key":"ref143","first-page":"4","article-title":"On between-coefficient contrast masking of DCT basis functions","volume-title":"Proc. 3rd Int. Workshop Video Process. Qual. Metrics Consum. Electron. (VPQM)","author":"Ponomarenko"},{"key":"ref144","volume-title":"VVCSoftware_VTM Repository","year":"2021"},{"key":"ref145","volume-title":"JPEG AI Sw V4.x Status","author":"Karabutov","year":"2023"},{"key":"ref146","article-title":"Residual non-local attention networks for image restoration","author":"Zhang","year":"2019","journal-title":"arXiv:1903.10082"},{"key":"ref147","volume-title":"Report of 3.11-Acceleration of Context Modules","author":"Wang","year":"2023"},{"key":"ref148","volume-title":"Meeting Report of the 19th JVET Meeting (Teleconference, 22 June\u20131 July 2020)","author":"Sullivan"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1109\/pcs50896.2021.9477455"},{"key":"ref150","volume-title":"EE1-3.2: Neural Network-Based Intra Prediction With Learned Mapping to VVC Intra Prediction Modes","author":"Dumas","year":"2023"},{"key":"ref151","volume-title":"Non-EE1: Neural Network-Based Intra Prediction With Learned Mapping to VVC Intra Prediction Modes","author":"Dumas","year":"2022"},{"key":"ref152","volume-title":"AHG11: Neural Network-Based Intra Prediction With Reduced Complexity","author":"Dumas","year":"2023"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.3390\/app13052795"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2023.3299410"},{"key":"ref155","volume-title":"AHG11: Neural Network-Based In-Loop Filter","author":"Wang","year":"2020"},{"key":"ref156","volume-title":"AHG11: Convolutional Neural Networks-Based In-Loop Filter","author":"Li","year":"2020"},{"key":"ref157","volume-title":"EE1-1.5: Combined Intra and Inter Models for Luma and Chroma","author":"Liu","year":"2023"},{"key":"ref158","volume-title":"EE1-1.1: Complexity Reduction on Neural-Network Loop Filter","author":"Shingala","year":"2023"},{"key":"ref159","volume-title":"AHG11\/EE1: Status of the Joint EE1-0 (LOP.2) Training","author":"Rusanovskyy","year":"2023"},{"key":"ref160","volume-title":"AHG11: Content-Adaptive Neural Network Post-Processing Filter","author":"Li","year":"2021"},{"key":"ref161","volume-title":"AHG11: Deep Neural Network for Super-Resolution","author":"Lee","year":"2020"},{"key":"ref162","volume-title":"EE1-2.2: GOP Level Adaptive Resampling With CNN-Based Super Resolution","author":"Chang","year":"2023"},{"key":"ref163","volume-title":"AHG11: A Hybrid Codec Using E2E Image Coding Combined With VVC Video Coding","author":"He","year":"2022"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1109\/icip49359.2023.10222497"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3038348"},{"key":"ref166","volume-title":"EE1-1.6: Deep In-Loop Filter With Fixed Point Implementation","author":"Li","year":"2022"},{"key":"ref167","volume-title":"EE1-1.6: RDO Considering Deep In-Loop Filtering","author":"Li","year":"2022"},{"key":"ref168","volume-title":"EE1-1.7: Deep In-Loop Filter With Additional Input Information","author":"Li","year":"2023"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1145\/3529107"},{"key":"ref170","volume-title":"Report: NNVC Software Development AhG14","author":"Galpin","year":"2024"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.5594\/J11028"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1985.13202"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-60268-2_327"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1991.150969"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00271"},{"key":"ref176","first-page":"3515","article-title":"Ultra-low bitrate video conferencing using deep image animation","volume-title":"Proc. IEEE Int. Conf. Image Process. (ICIP)","author":"Konuko"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9859867"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1109\/DCC52660.2022.00009"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.9897729"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3271130"},{"key":"ref181","article-title":"Interactive face video coding: A generative compression framework","author":"Chen","year":"2023","journal-title":"arXiv:2302.09919"},{"key":"ref182","doi-asserted-by":"publisher","DOI":"10.1109\/DCC58796.2024.00019"},{"key":"ref183","first-page":"8780","article-title":"Diffusion models beat GANs on image synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Dhariwal"},{"key":"ref184","volume-title":"AHG9: Common SEI Message of Generative Face Video","author":"Chen","year":"2023"},{"key":"ref185","volume-title":"AHG9: A Study on Generative Face Video SEI Message","author":"Teo","year":"2023"},{"key":"ref186","volume-title":"AHG9: On the Generative Face Video SEI Message","author":"Hannuksela","year":"2024"},{"key":"ref187","volume-title":"AHG9: Common Text for Proposed Generative Face Video SEI Message","author":"Chen","year":"2023"},{"key":"ref188","volume-title":"AHG9: Usage of the Neural-Network Post-Filter Characteristics Sei Message to Define the Generator NN of the Generative Face Video SEI Message","author":"Hannuksela","year":"2024"},{"key":"ref189","volume-title":"On VVC-assisted Ultra-low Rate Generative Face Video Coding","author":"Ye","year":"2023"},{"key":"ref190","volume-title":"AHG16: Proposed Common Software Tools and Testing Conditions for Generative Face Video Compression","author":"Chen","year":"2024"},{"key":"ref191","volume-title":"Test Conditions and Evaluation Procedures for Generative Face Video Coding","author":"McCarthy","year":"2024"},{"key":"ref192","volume-title":"AHG16: Interoperability Study on Parameter Translator of Generative Face Video Coding","author":"Yin","year":"2024"},{"key":"ref193","volume-title":"AHG16: Depthwise Separable Convolution for Generative Face Video Compression","author":"Zou","year":"2024"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-10039-7"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10302-5"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3250616"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3251396"},{"key":"ref198","article-title":"Diffusion models for image restoration and enhancement\u2014A comprehensive survey","author":"Li","year":"2023","journal-title":"arXiv:2308.09388"},{"key":"ref199","first-page":"1278","article-title":"Stochastic backpropagation and approximate inference in deep generative models","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Rezende"},{"key":"ref200","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10777"},{"key":"ref201","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2852738"},{"key":"ref202","article-title":"Fully unsupervised diversity denoising with convolutional variational autoencoders","author":"Prakash","year":"2020","journal-title":"arXiv:2006.06072"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.509"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1109\/TCI.2020.3032671"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093393"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3084743"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3003832"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-25063-7_24"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00333"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP42928.2021.9506694"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02135"},{"key":"ref212","article-title":"GAN2GAN: Generative noise learning for blind denoising with single noisy images","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Cha"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00856"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2019.2920407"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00029"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00221"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201304"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-33843-5_9"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00037"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093603"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1809.00219"},{"key":"ref222","doi-asserted-by":"publisher","DOI":"10.1117\/12.2530688"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00228"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2019.2922960"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00971"},{"key":"ref226","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00193"},{"key":"ref227","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3261988"},{"key":"ref228","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3238179"},{"key":"ref229","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3204461"},{"key":"ref230","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.01.029"},{"key":"ref231","first-page":"23593","article-title":"Denoising diffusion restoration models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Kawar"},{"key":"ref232","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00966"},{"key":"ref233","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01204"},{"key":"ref234","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3305243"},{"key":"ref235","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"ref236","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"ref237","article-title":"Semantic image synthesis via diffusion models","author":"Wang","year":"2022","journal-title":"arXiv:2207.00050"},{"key":"ref238","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00678"},{"key":"ref239","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_3"},{"key":"ref240","article-title":"You only need adversarial supervision for semantic image synthesis","author":"Sushko","year":"2020","journal-title":"arXiv:2012.04781"},{"key":"ref241","first-page":"1691","article-title":"Generative pretraining from pixels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Chen"},{"key":"ref242","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"ref243","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19787-1_41"},{"key":"ref244","first-page":"33115","article-title":"SDDM: Score-decomposed diffusion models on manifolds for unpaired image-to-image translation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sun"},{"key":"ref245","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"ref246","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00183"},{"key":"ref247","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01422"},{"key":"ref248","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.478"},{"key":"ref249","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00201"},{"key":"ref250","article-title":"Frame interpolation using generative adversarial networks","author":"Koren","year":"2017"},{"key":"ref251","article-title":"Frame interpolation with multi-scale deep loss functions and generative adversarial networks","author":"van Amersfoort","year":"2017","journal-title":"arXiv:1711.06045"},{"key":"ref252","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2867934"},{"key":"ref253","first-page":"5769","article-title":"Improved training of Wasserstein GANs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Gulrajani"},{"key":"ref254","first-page":"23371","article-title":"MCVD-masked conditional video diffusion for prediction, generation, and interpolation","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","volume":"35","author":"Voleti"},{"key":"ref255","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612405"},{"key":"ref256","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27912"},{"key":"ref257","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58568-6_7"},{"key":"ref258","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00351"},{"key":"ref259","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2957235"},{"key":"ref260","doi-asserted-by":"publisher","DOI":"10.1109\/ICME46284.2020.9102895"},{"key":"ref261","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3479234"},{"key":"ref262","doi-asserted-by":"publisher","DOI":"10.1109\/IJCB57857.2023.10449044"},{"key":"ref263","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2022.103617"},{"key":"ref264","article-title":"A study on the evaluation of generative models","author":"Betzalel","year":"2022","journal-title":"arXiv:2206.10935"},{"key":"ref265","first-page":"2226","article-title":"Improved techniques for training GANs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Salimans"},{"key":"ref266","first-page":"6629","article-title":"GANs trained by a two time-scale update rule converge to a local Nash equilibrium","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Heusel"},{"key":"ref267","article-title":"Compound Frechet inception distance for quality assessment of GAN created images","author":"Nunn","year":"2021","journal-title":"arXiv:2106.08575"},{"key":"ref268","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2987180"},{"key":"ref269","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01008"},{"key":"ref270","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW60836.2024.00054"},{"key":"ref271","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"ref272","article-title":"Integer networks for data compression with latent-variable models","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Ball\u00e9"},{"key":"ref273","doi-asserted-by":"publisher","DOI":"10.1109\/PCS50896.2021.9477496"},{"key":"ref274","article-title":"Post-training quantization for cross-platform learned image compression","author":"He","year":"2022","journal-title":"arXiv:2202.07513"},{"key":"ref275","doi-asserted-by":"publisher","DOI":"10.1109\/PCS56426.2022.10018040"},{"key":"ref276","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP49359.2023.10222336"},{"key":"ref277","article-title":"Quantized decoder in learned image compression for deterministic reconstruction","author":"Koyuncu","year":"2023","journal-title":"arXiv:2312.11209"},{"key":"ref278","doi-asserted-by":"publisher","DOI":"10.1201\/9781003162810-13"},{"key":"ref279","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP56404.2022.10008867"},{"key":"ref280","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3231789"},{"key":"ref281","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3323015"},{"key":"ref282","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.9897854"},{"key":"ref283","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00447"},{"key":"ref284","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053102"},{"key":"ref285","article-title":"EVC: Towards real-time neural image compression with mask decay","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Guo-Hua"},{"key":"ref286","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475667"},{"key":"ref287","article-title":"Bandwidth-efficient inference for neural image compression","author":"Yin","year":"2023","journal-title":"arXiv:2309.02855"},{"key":"ref288","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-8148-9_34"},{"key":"ref289","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3164059"},{"key":"ref290","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS51556.2021.9401164"},{"key":"ref291","doi-asserted-by":"publisher","DOI":"10.3390\/electronics12102289"},{"key":"ref292","doi-asserted-by":"publisher","DOI":"10.1109\/A-SSCC56115.2022.9980666"},{"key":"ref293","first-page":"1","article-title":"Deep residual learning for image compression","volume-title":"Proc. CVPR Workshops","author":"Cheng"},{"key":"ref294","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP56404.2022.10008876"},{"key":"ref295","first-page":"C188","article-title":"A 1062 Mpixels\/s 8192\u00d74320p high efficiency video coding (H.265) encoder chip","volume-title":"Proc. Symp. VLSI Circuits","author":"Tsai"},{"key":"ref296","doi-asserted-by":"publisher","DOI":"10.1109\/isscc.2013.6487682"},{"key":"ref297","doi-asserted-by":"publisher","DOI":"10.1109\/isscc.2016.7418009"},{"key":"ref298","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01383"},{"key":"ref299","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611955"},{"key":"ref300","article-title":"Accelerating learnt video codecs with gradient decay and layer-wise distillation","author":"Peng","year":"2023","journal-title":"arXiv:2312.02605"},{"key":"ref301","article-title":"A computationally efficient neural video compression accelerator based on a sparse CNN-transformer hybrid network","author":"Zhang","year":"2023","journal-title":"arXiv:2312.10716"},{"key":"ref302","doi-asserted-by":"publisher","DOI":"10.1145\/3524273.3532906"},{"key":"ref303","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00427"},{"key":"ref304","article-title":"COIN: Compression with implicit neural representations","volume-title":"Proc. Neural Compression From Inf. Theory Appl. Workshop@ ICLR","author":"Dupont"},{"key":"ref305","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19809-0_5"},{"key":"ref306","article-title":"C3: High-performance and low-complexity neural compression from a single image or video","author":"Kim","year":"2023","journal-title":"arXiv:2312.02753"},{"key":"ref307","article-title":"Implicit neural video compression","volume-title":"Proc. ICLR Workshop Deep Generative Models Highly Structured Data","author":"Zhang"},{"key":"ref308","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01243"},{"key":"ref309","article-title":"Instance-adaptive video compression: Improving neural codecs by training on the test set","author":"van Rozendaal","year":"2021","journal-title":"arXiv:2111.10302"},{"key":"ref310","first-page":"1","article-title":"COIN++: Neural compression across modalities","volume":"2022","author":"Dupont","year":"2022","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref311","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01774"},{"key":"ref312","doi-asserted-by":"publisher","DOI":"10.1109\/DCC55655.2023.00029"},{"key":"ref313","first-page":"1938","article-title":"Compression with Bayesian implicit neural representations","volume-title":"Proc. 37th Conf. Neural Inf. Process. Syst.","author":"Guo"},{"key":"ref314","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2023.3334956"},{"key":"ref315","first-page":"1078","article-title":"Inference suboptimality in variational autoencoders","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Cremer"},{"key":"ref316","article-title":"Overfitting for fun and profit: Instance-adaptive data compression","author":"van Rozendaal","year":"2021","journal-title":"arXiv:2101.08687"},{"key":"ref317","doi-asserted-by":"publisher","DOI":"10.1109\/PCS56426.2022.10018064"},{"key":"ref318","doi-asserted-by":"publisher","DOI":"10.1109\/PCS56426.2022.10018052"},{"key":"ref319","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP53242.2021.9675360"},{"key":"ref320","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP56404.2022.10008819"},{"key":"ref321","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2023.3340982"},{"key":"ref322","article-title":"A training-free defense framework for robust learned image compression","author":"Song","year":"2024","journal-title":"arXiv:2401.11902"},{"key":"ref323","article-title":"Attack and defense analysis of learned image compression","author":"Zhu","year":"2024","journal-title":"arXiv:2401.10345"},{"key":"ref324","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3276442"},{"key":"ref325","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00324"},{"key":"ref326","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01039"},{"key":"ref327","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00238"}],"container-title":["IEEE Journal on Emerging and Selected Topics in Circuits and Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5503868\/10578329\/10535893.pdf?arnumber=10535893","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T17:40:14Z","timestamp":1720806014000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10535893\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":327,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/jetcas.2024.3403524","relation":{},"ISSN":["2156-3357","2156-3365"],"issn-type":[{"value":"2156-3357","type":"print"},{"value":"2156-3365","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6]]}}}