{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:37:34Z","timestamp":1763192254238,"version":"3.45.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,12]]},"DOI":"10.1109\/waspaa66052.2025.11230962","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:47Z","timestamp":1763146007000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["Post-Training Quantization for Audio Diffusion Transformers"],"prefix":"10.1109","author":[{"given":"Tanmay","family":"Khandelwal","sequence":"first","affiliation":[{"name":"New York University,Courant Institute of Mathematical Sciences,NY,USA"}]},{"given":"Magdalena","family":"Fuentes","sequence":"additional","affiliation":[{"name":"New York University,MARL,NY,USA"}]}],"member":"263","reference":[{"article-title":"Sketch2sound: Controllable audio generation via time-varying signals and sonic imitations","year":"2024","author":"Flores Garc\u00eda","key":"ref1"},{"key":"ref2","first-page":"21450","article-title":"AudioLDM: Text-to-audio generation with latent diffusion models","volume-title":"Proceedings of the 40th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"202","author":"Liu"},{"key":"ref3","article-title":"Controllable music production with diffusion models and guidance gradients","volume-title":"NeurIPS","author":"Levy","year":"2023"},{"article-title":"Noise2Music: Text-conditioned music generation with diffusion models","year":"2023","author":"Huang","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3677996.3678289"},{"article-title":"Edmsound: Spectrogram based diffusion models for efficient and high-quality audio synthesis","year":"2023","author":"Zhu","key":"ref6"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV51070.2023.00387","article-title":"Scalable diffusion models with transformers","author":"Peebles","year":"2023"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52729.2023.02171","article-title":"All are worth words: A ViT backbone for diffusion models","author":"Bao","year":"2023"},{"article-title":"Post-training quantization for diffusion transformer via hierarchical timestep grouping","year":"2025","author":"Ding","key":"ref9"},{"article-title":"Fast timing-conditioned latent audio diffusion","volume-title":"Proceedings of the 41st International Conference on Machine Learning, ser. ICML\u201924","author":"Evans","key":"ref10"},{"article-title":"AudioX: Diffusion transformer for anything-to-audio generation","year":"2025","author":"Tian","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3390\/math11081915"},{"key":"ref13","doi-asserted-by":"crossref","DOI":"10.1109\/ICASSP49660.2025.10889311","article-title":"ImmerseDiffusion: A generative spatial audio latent diffusion model","volume-title":"ICASSP","author":"Heydari","year":"2025"},{"article-title":"DEX-TTS: Diffusion-based expressive text-to-speech with style modeling on time variability","year":"2024","author":"Park","key":"ref14"},{"article-title":"Token caching for diffusion transformer acceleration","year":"2024","author":"Lou","key":"ref15"},{"article-title":"Deepcache: Accelerating diffusion models for free","year":"2023","author":"Ma","key":"ref16"},{"key":"ref17","doi-asserted-by":"crossref","DOI":"10.1137\/22M1511709","article-title":"Post-training quantization for neural networks with provable guarantees","author":"Zhang","year":"2023"},{"key":"ref18","first-page":"17535","article-title":"Q-diffusion: Quantizing diffusion models","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Li"},{"article-title":"Temporal distribution-aware quantization for diffusion models","volume-title":"International Conference on Learning Representations (ICLR)","author":"Zhan","key":"ref19"},{"key":"ref20","article-title":"PTQ4ADM: Post-training quantization for efficient text conditional audio diffusion models","volume-title":"ArXiv","author":"Vora","year":"2024"},{"key":"ref21","article-title":"PTQ4DiT: Post-training quantization for diffusion transformers","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Wu","year":"2024"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00196"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52733.2024.01517","article-title":"Towards accurate post-training quantization for diffusion models","author":"Wang","year":"2024"},{"article-title":"PTQD: Accurate post-training quantization for diffusion models","year":"2023","author":"He","key":"ref24"},{"article-title":"SVDQuant: Absorbing outliers by low-rank components for 4-bit diffusion models","year":"2025","author":"Li","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/wacv61041.2025.00452"},{"article-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","year":"2024","author":"Xiao","key":"ref27"},{"article-title":"Lora: Low-rank adaptation of large language models","year":"2021","author":"Hu","key":"ref28"},{"article-title":"Stable audio open","year":"2024","author":"Evans","key":"ref29"},{"key":"ref30","first-page":"119","article-title":"AudioCaps: Generating captions for audios in the wild","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)","author":"Kim"},{"article-title":"Attention is all you need","year":"2023","author":"Vaswani","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1011"}],"event":{"name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","start":{"date-parts":[[2025,10,12]]},"location":"Tahoe City, CA, USA","end":{"date-parts":[[2025,10,15]]}},"container-title":["2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11230875\/11230917\/11230962.pdf?arnumber=11230962","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:32:33Z","timestamp":1763191953000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11230962\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/waspaa66052.2025.11230962","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]}}}