{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T04:12:06Z","timestamp":1768536726499,"version":"3.49.0"},"reference-count":53,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11228445","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Improving Vector-Quantized Image Modeling with Latent Consistency-Matching Diffusion"],"prefix":"10.1109","author":[{"given":"Bac","family":"Nguyen","sequence":"first","affiliation":[{"name":"Sony AI"}]},{"given":"Chieh-Hsin","family":"Lai","sequence":"additional","affiliation":[{"name":"Sony AI"}]},{"given":"Yuta","family":"Takida","sequence":"additional","affiliation":[{"name":"Sony AI"}]},{"given":"Naoki","family":"Murata","sequence":"additional","affiliation":[{"name":"Sony AI"}]},{"given":"Toshimitsu","family":"Uesaka","sequence":"additional","affiliation":[{"name":"Sony AI"}]},{"given":"Stefano","family":"Ermon","sequence":"additional","affiliation":[{"name":"Stanford University"}]},{"given":"Yuki","family":"Mitsufuji","sequence":"additional","affiliation":[{"name":"Sony AI"}]}],"member":"263","reference":[{"key":"ref1","first-page":"17981","article-title":"Structured denoising diffusion models in discrete state-spaces","volume-title":"NeurIPS","author":"Austin"},{"key":"ref2","article-title":"Layer normalization","volume-title":"NeurIPS","author":"Ba"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.1007\/3-540-33486-6_6","article-title":"A neural probabilistic language model","volume-title":"NeurIPS","author":"Bengio"},{"key":"ref4","article-title":"A Pytorch reproduction of masked generative image transformer","author":"Besnier","year":"2023"},{"key":"ref5","article-title":"Large scale GAN training for high fidelity natural image synthesis","volume-title":"ICLR","author":"Brock"},{"key":"ref6","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"NeurIPS","author":"Brown"},{"key":"ref7","first-page":"28266","article-title":"A continuous time framework for discrete denoising models","volume-title":"NeurIPS","author":"Campbell"},{"key":"ref8","first-page":"4055","article-title":"Muse: Text-to-image generation via masked generative transformers","volume-title":"ICML","author":"Chang"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01103"},{"key":"ref10","article-title":"Analog bits: Generating discrete data using diffusion models with self-conditioning","volume-title":"ICLR","author":"Chen"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref12","first-page":"8780","article-title":"Diffusion models beat GANs on image synthesis","volume-title":"NeurIPS","author":"Dhariwal"},{"key":"ref13","article-title":"Continuous diffusion for categorical data","author":"Dieleman","year":"2022"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.261"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01043"},{"key":"ref17","article-title":"Likelihood-based diffusion language models","volume-title":"NeurIPS","volume":"36","author":"Gulrajani"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref19","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"NeurIPS","author":"Ho"},{"key":"ref20","article-title":"Classifier-free diffusion guidance","volume-title":"NeurIPS Workshop","author":"Ho"},{"key":"ref21","article-title":"The curious case of neural text degeneration","volume-title":"ICLR","author":"Holtzman"},{"key":"ref22","first-page":"13213","article-title":"Simple diffusion: end-to-end diffusion for high resolution images","volume-title":"ICML","author":"Hoogeboom"},{"key":"ref23","first-page":"12454","article-title":"Argmax flows and multinomial diffusion: Learning categorical distributions","volume-title":"NeurIPS","author":"Hoogeboom"},{"key":"ref24","article-title":"Elucidating the design space of diffusion-based generative models","volume-title":"NeurIPS","author":"Karras"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref26","article-title":"Consistency trajectory models: Learning probability flow ODE trajectory of diffusion","volume-title":"NeurIPS Workshop","author":"Kim"},{"key":"ref27","first-page":"21696","article-title":"Variational diffusion models","volume-title":"NeurIPS","author":"Kingma"},{"key":"ref28","article-title":"Understanding diffusion objectives as the elbo with simple data augmentation","volume-title":"NeurIPS","author":"Kingma"},{"key":"ref29","article-title":"Improved precision and recall metric for assessing generative models","volume":"32","author":"Kynk\u00e4\u00e4nniemi","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref30","first-page":"18365","article-title":"FP-diffusion: Improving score-based diffusion models by enforcing the underlying score Fokker-Planck equation","volume-title":"ICML","author":"Lai"},{"key":"ref31","article-title":"On the equivalence of consistency-type models: Consistency models, consistent diffusion models, and fokker-planck regularization","volume-title":"ICML Workshop","author":"Lai"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20050-2_5"},{"key":"ref33","first-page":"4328","article-title":"Diffusion-LM improves controllable text generation","volume-title":"NeurIPS","author":"Li"},{"key":"ref34","article-title":"Discrete diffusion language modeling by estimating the ratios of the data distribution","author":"Lou","year":"2023"},{"key":"ref35","article-title":"Latent consistency models: Synthesizing high-resolution images with few-step inference","author":"Luo","year":"2023"},{"key":"ref36","first-page":"34532","article-title":"Concrete score matching: Generalized score matching for discrete data","volume-title":"NeurIPS","author":"Meng"},{"key":"ref37","first-page":"8162","article-title":"Improved denoising diffusion probabilistic models","volume-title":"ICML","author":"Nichol"},{"key":"ref38","article-title":"Fast text-conditional discrete denoising on vector-quantized latent spaces","author":"Rampas","year":"2022"},{"key":"ref39","article-title":"Generating diverse high-fidelity images with VQ-VAE-2","volume-title":"NeurIPS","author":"Razavi"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref41","first-page":"12533","article-title":"D2c: Diffusion-decoding models for few-shot conditional generation","volume-title":"NeurIPS","volume":"34","author":"Sinha"},{"key":"ref42","first-page":"2256","article-title":"Deep unsupervised learning using nonequilibrium thermodynamics","volume-title":"ICML","author":"Sohl-Dickstein"},{"key":"ref43","article-title":"Denoising diffusion implicit models","author":"Song","year":"2020"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/960126.806879"},{"key":"ref45","first-page":"1415","article-title":"Maximum likelihood training of score-based diffusion models","volume-title":"NeurIPS","author":"Song"},{"key":"ref46","article-title":"Score-based generative modeling through stochastic differential equations","volume-title":"ICLR","author":"Song"},{"key":"ref47","article-title":"Self-conditioned embedding diffusion for text generation","author":"Strudel","year":"2022"},{"key":"ref48","article-title":"Score-based continuous-time discrete diffusion models","volume-title":"ICLR","author":"Sun"},{"key":"ref49","first-page":"11287","article-title":"Score-based generative modeling in latent space","volume-title":"NeurIPS","volume":"34","author":"Vahdat"},{"key":"ref50","article-title":"Neural discrete representation learning","volume":"30","author":"Van Den Oord","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref51","article-title":"Attention is all you need","volume-title":"NeurIPS","author":"Vaswani"},{"key":"ref52","article-title":"Dinoiser: Diffused conditional sequence learning by manipulating noises","author":"Ye","year":"2023"},{"key":"ref53","article-title":"LSUN: Construction of a large-scale image dataset using deep learning with humans in the loop","author":"Yu","year":"2015"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","location":"Rome, Italy","start":{"date-parts":[[2025,6,30]]},"end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11228445.pdf?arnumber=11228445","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:12:24Z","timestamp":1763190744000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11228445\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11228445","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}