{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T08:36:14Z","timestamp":1758098174597,"version":"3.44.0"},"reference-count":105,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476018"],"award-info":[{"award-number":["62476018"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Municipal Science and Technology Project","award":["Z231100010323002"],"award-info":[{"award-number":["Z231100010323002"]}]},{"name":"Postdoctoral Fellowship Program and China","award":["BX20250487"],"award-info":[{"award-number":["BX20250487"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tpami.2025.3585692","type":"journal-article","created":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T13:28:46Z","timestamp":1751549326000},"page":"8823-8837","source":"Crossref","is-referenced-by-count":0,"title":["Temporal Feature Matters: A Framework for Diffusion Model Quantization"],"prefix":"10.1109","volume":"47","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-7898-8402","authenticated-orcid":false,"given":"Yushi","family":"Huang","sequence":"first","affiliation":[{"name":"Electrical and Computer Engineering Department, Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6024-7086","authenticated-orcid":false,"given":"Ruihao","family":"Gong","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Complex &#x0026; Critical Software Environment, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8425-4195","authenticated-orcid":false,"given":"Xianglong","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Complex &#x0026; Critical Software Environment, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6745-3050","authenticated-orcid":false,"given":"Jing","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6444-7253","authenticated-orcid":false,"given":"Yuhang","family":"Li","sequence":"additional","affiliation":[{"name":"Electrical Engineering Department, Yale University, New Haven, CT, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6121-5529","authenticated-orcid":false,"given":"Jiwen","family":"Lu","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7225-5449","authenticated-orcid":false,"given":"Dacheng","family":"Tao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanyang Technological University, Singapore"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1173","article-title":"Approximate caching for efficiently serving text-to-image diffusion models","volume-title":"Proc. 21st USENIX Symp. Netw. Syst. Des. Implementation","author":"Agarwal"},{"key":"ref2","first-page":"1555","article-title":"Estimating the optimal covariance with imperfect mean in diffusion probabilistic models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Bao"},{"key":"ref3","first-page":"1","article-title":"Analytic-DPM: An analytic estimate of the optimal reverse variance in diffusion probabilistic models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Bao"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00356"},{"article-title":"Language models are few-shot learners","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Brown","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02636"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00490"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00363"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01209"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","year":"2020","author":"Dosovitskiy","key":"ref11"},{"key":"ref12","first-page":"1","article-title":"Learned step size quantization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Esser"},{"key":"ref13","first-page":"4475","article-title":"Optimal brain compression: A framework for accurate post-training quantization and pruning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Frantar"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.3390\/e25040633"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00495"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"ref17","first-page":"1135","article-title":"Learning both weights and connections for efficient neural network","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Han"},{"key":"ref18","first-page":"1","article-title":"EfficientDM: Efficient quantization-aware fine-tuning of low-bit diffusion models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"He"},{"article-title":"PTQD: Accurate post-training quantization for diffusion models","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"He","key":"ref19"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"article-title":"GANs trained by a two time-scale update rule converge to a local nash equilibrium","year":"2018","author":"Heusel","key":"ref21"},{"article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Ho","key":"ref22"},{"article-title":"Classifier-free diffusion guidance","year":"2022","author":"Ho","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00703"},{"article-title":"Improving post training neural quantization: Layer-wise calibration and integer programming","year":"2020","author":"Hubara","key":"ref25"},{"year":"2018","key":"ref26","article-title":"Openvino"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"article-title":"Gotta go fast when generating data with score-based models","year":"2021","author":"Jolicoeur-Martineau","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00976"},{"article-title":"Progressive growing of GANs for improved quality, stability, and variation","year":"2018","author":"Karras","key":"ref30"},{"article-title":"Elucidating the design space of diffusion-based generative models","volume-title":"Proc. 36th Int. Conf. Neural Inf. Process. Syst.","author":"Karras","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref33","article-title":"CUTLASS: Fast linear algebra in CUDA C++","volume":"2","author":"Kerr","year":"2017","journal-title":"NVIDIA Developer Blog"},{"article-title":"Denoising MCMC for accelerating diffusion-based generative models","year":"2022","author":"Kim","key":"ref34"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72949-2_22"},{"article-title":"Task-oriented diffusion model compression","year":"2024","author":"Kim","key":"ref36"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/362"},{"key":"ref38","first-page":"21696","article-title":"Variational diffusion models","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Kingma"},{"article-title":"Auto-encoding variational Bayes","year":"2022","author":"Kingma","key":"ref39"},{"article-title":"On fast sampling of diffusion probabilistic models","year":"2021","author":"Kong","key":"ref40"},{"issue":"1","key":"ref41","first-page":"1","article-title":"Learning multiple layers of features from tiny images","volume":"1","author":"Krizhevsky","year":"2009"},{"article-title":"Flux","year":"2024","author":"Labs","key":"ref42"},{"key":"ref43","first-page":"1","article-title":"BDDM: Bilateral denoising diffusion models for fast and high-quality speech synthesis","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lam"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01608"},{"key":"ref45","first-page":"1","article-title":"BRECQ: Pushing the limit of post-training quantization by block reconstruction","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Li"},{"article-title":"SnapFusion: Text-to-image diffusion model on mobile devices within two seconds","volume-title":"Proc. 37th Int. Conf. Neural Inf. Process. Syst.","author":"Li","key":"ref46"},{"article-title":"Efficient adaptive activation rounding for post-training quantization","year":"2022","author":"Li","key":"ref47"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/164"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02340"},{"key":"ref51","first-page":"1","article-title":"Pseudo numerical methods for diffusion models on manifolds","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Liu"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02090"},{"key":"ref53","first-page":"1","article-title":"Relaxed quantization for discretized neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Louizos"},{"article-title":"DPM-Solver: A fast ODE solver for diffusion probabilistic model sampling in around 10 steps","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Lu","key":"ref54"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-025-1562-4"},{"article-title":"Knowledge distillation in iterative generative models for improved sampling speed","year":"2021","author":"Luhman","key":"ref56"},{"article-title":"Accelerating diffusion models via early stop of the diffusion process","year":"2022","author":"Lyu","key":"ref57"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01492"},{"key":"ref59","first-page":"23803","article-title":"Cross-entropy loss functions: Theoretical analysis and applications","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mao"},{"key":"ref60","first-page":"1","article-title":"NVIDIA 8-bit inference width TensorRT","volume-title":"Proc. GPU Technol. Conf.","author":"Migacz"},{"key":"ref61","first-page":"7197","article-title":"Up or down? Adaptive rounding for post-training quantization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Nagel"},{"article-title":"A white paper on neural network quantization","year":"2021","author":"Nagel","key":"ref62"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00746"},{"key":"ref64","first-page":"8162","article-title":"Improved denoising diffusion probabilistic models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Nichol"},{"year":"2019","key":"ref65","article-title":"Tensorrt"},{"key":"ref66","article-title":"Stable diffusion with core ML on Apple silicon","author":"Orhon","year":"2022","journal-title":"GitHub repository"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00157"},{"article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Paszke","key":"ref68"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.00387"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"article-title":"SDXL: Improving latent diffusion models for high-resolution image synthesis","year":"2023","author":"Podell","key":"ref71"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_32"},{"article-title":"FastSpeech 2: Fast and high-quality end-to-end text to speech","year":"2022","author":"Ren","key":"ref73"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref76","first-page":"2234","article-title":"Improved techniques for training GANs","volume-title":"Proc. 30th Int. Conf. Neural Inf. Process. Syst.","author":"Salimans"},{"key":"ref77","first-page":"1","article-title":"Progressive distillation for fast sampling of diffusion models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Salimans"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73016-0_6"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00196"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"article-title":"Temporal dynamic quantization for diffusion models","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"So","key":"ref81"},{"key":"ref82","first-page":"1","article-title":"Denoising diffusion implicit models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Song"},{"key":"ref83","first-page":"1","article-title":"Score-based generative modeling through stochastic differential equations","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Song"},{"article-title":"BitsFusion: 1.99 bits weight quantization of diffusion model","volume-title":"Proc. 38th Int. Conf. Neural Inf. Process. Syst.","author":"Sui","key":"ref84"},{"article-title":"Towards accurate data-free quantization for diffusion models","year":"2023","author":"Wang","key":"ref85"},{"article-title":"QuEST: Low-bit diffusion model quantization via efficient selective finetuning","year":"2024","author":"Wang","key":"ref86"},{"key":"ref87","first-page":"9847","article-title":"Towards accurate post-training network quantization via bit-split and stitching","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref88","first-page":"1","article-title":"Learning fast samplers for diffusion models by differentiating through sample quality","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Watson"},{"key":"ref89","first-page":"1","article-title":"QDrop: Randomly dropping quantization for extremely low-bit post-training quantization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wei"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00594"},{"article-title":"Integer quantization for deep learning inference: Principles and empirical evaluation","year":"2020","author":"Wu","key":"ref91"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01843"},{"article-title":"PTQ4DiT: Post-training quantization for diffusion transformers","volume-title":"Proc. 38th Int. Conf. Neural Inf. Process. Syst.","author":"Wu","key":"ref93"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02160"},{"article-title":"LSUN: Construction of a large-scale image dataset using deep learning with humans in the loop","year":"2016","author":"Yu","key":"ref95"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-022-03691-1"},{"article-title":"Fast sampling of diffusion models with exponential integrator","year":"2022","author":"Zhang","key":"ref97"},{"article-title":"gDDIM: Generalized denoising diffusion implicit models","year":"2022","author":"Zhang","key":"ref98"},{"article-title":"OPT: Open pre-trained transformer language models","year":"2022","author":"Zhang","key":"ref99"},{"article-title":"ViDiT-Q: Efficient and accurate quantization of diffusion transformers for image and video generation","year":"2025","author":"Zhao","key":"ref100"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73033-7_13"},{"key":"ref102","article-title":"Truncated diffusion probabilistic models","volume":"1050","author":"Zheng","year":"2022","journal-title":"Stat."},{"article-title":"Open-Sora: Democratizing efficient video production for all","year":"2024","author":"Zheng","key":"ref103"},{"key":"ref104","first-page":"27179","article-title":"On the optimization landscape of neural collapse under MSE loss: Global optimality with unconstrained features","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhou"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00826"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/11163533\/11068163.pdf?arnumber=11068163","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T13:11:16Z","timestamp":1758028276000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11068163\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":105,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3585692","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"type":"print","value":"0162-8828"},{"type":"electronic","value":"2160-9292"},{"type":"electronic","value":"1939-3539"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}