{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T13:13:52Z","timestamp":1776950032727,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,4]]},"DOI":"10.1145\/3777884.3797012","type":"proceedings-article","created":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T12:27:26Z","timestamp":1776947246000},"page":"218-229","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Performance Analysis and Optimization of 3D Generative Diffusion Models across GPU Architectures"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-0401-3685","authenticated-orcid":false,"given":"Jeeho","family":"Ryoo","sequence":"first","affiliation":[{"name":"Fairleigh Dickinson University, Vancouver, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6504-6723","authenticated-orcid":false,"given":"Yongchan","family":"Jung","sequence":"additional","affiliation":[{"name":"Fairleigh Dickinson University, Vancouver, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2256-7974","authenticated-orcid":false,"given":"Muhammad Ali","family":"Khaliq","sequence":"additional","affiliation":[{"name":"University of Colorado at Colorado Springs, Colorado Springs, CO, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2158-1784","authenticated-orcid":false,"given":"Weidong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Northeastern University, Vancouver, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2843-1780","authenticated-orcid":false,"given":"Jiatong","family":"Han","sequence":"additional","affiliation":[{"name":"Fairleigh Dickinson University, Vancouver, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0260-2238","authenticated-orcid":false,"given":"Byeong Kil","family":"Lee","sequence":"additional","affiliation":[{"name":"University of Colorado at Colorado Springs, Colorado Springs, CO, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC63097.2024.00025"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59710-8_65"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2022.3220681"},{"key":"e_1_3_2_1_4_1","unstructured":"NVIDIA Corporation. 2017. NVIDIA Tesla V100 GPU Architecture. Technical Report. NVIDIA. https:\/\/images.nvidia.com\/content\/volta-architecture\/pdf\/volta-architecture-whitepaper.pdf"},{"key":"e_1_3_2_1_5_1","unstructured":"NVIDIA Corporation. 2020. NVIDIA A100 Tensor Core GPU Architecture. Technical Report. NVIDIA. https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf"},{"key":"e_1_3_2_1_6_1","unstructured":"NVIDIA Corporation. 2022. NVIDIA H100 Tensor Core GPU Architecture. Technical Report. NVIDIA. https:\/\/resources.nvidia.com\/en-us-hopper-architecture\/nvidia-h100-tensor-c"},{"key":"e_1_3_2_1_7_1","unstructured":"NVIDIA Corporation. 2023a. Nsight Compute Kernel Profiling Guide. Technical Report. NVIDIA Corporation. https:\/\/docs.nvidia.com\/nsight-compute\/2023.2\/pdf\/ProfilingGuide.pdf v2023.2.2."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2023.10070122"},{"key":"e_1_3_2_1_9_1","volume-title":"Nsight Compute Profiling Guide. https:\/\/docs.nvidia.com\/nsight-compute\/ProfilingGuide\/index.html Version","author":"NVIDIA Corporation","year":"2025","unstructured":"NVIDIA Corporation. 2025. Nsight Compute Profiling Guide. https:\/\/docs.nvidia.com\/nsight-compute\/ProfilingGuide\/index.html Version 2025.3.1."},{"key":"e_1_3_2_1_10_1","volume-title":"The Secret to NVIDIA's AI Success","author":"Dally Bill","year":"2023","unstructured":"Bill Dally. 2023. The Secret to NVIDIA's AI Success. IEEE Spectrum (2023). https:\/\/spectrum.ieee.org\/nvidia-gpu"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i19.30095"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2024.3385504"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/PDP66500.2025.00019"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2022.3224660"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.3390\/electronics12214411"},{"key":"e_1_3_2_1_16_1","unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative Adversarial Nets. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2023.3314747"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3629526.3653835"},{"key":"e_1_3_2_1_19_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.3389\/fonc.2022.969463"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/EMBC48229.2022.9871119"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2016.2531642"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CONF-SPML54095.2021.00059"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2025.3565183"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.3390\/jimaging11050152"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00070"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s42519-024-00422-2"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eclinm.2025.103228"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12880-025-01701-5"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpubh.2023.1273253"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10889167"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3607615"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS48437.2021.9407080"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-7022-3_37"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3399098"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.bbe.2025.09.001"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-023-39278-0"},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops. IEEE, 1330-1339","author":"Nazir Maham","year":"2025","unstructured":"Maham Nazir, Muhammad Aqeel, and Francesco Setti. 2025. Diffusion-Based Data Augmentation for Medical Image Segmentation. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops. IEEE, 1330-1339."},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning.","author":"Alexander","unstructured":"Alexander Q. Nichol and Prafulla Dhariwal. 2021. Improved Denoising Diffusion Probabilistic Models. In Proceedings of the 38th International Conference on Machine Learning."},{"key":"e_1_3_2_1_40_1","volume-title":"Special Issue on Score-Based Generative Models for Medical Imaging","author":"Medical Imaging IEEE","year":"2024","unstructured":"IEEE Transactions on Medical Imaging. 2024. Special Issue on Score-Based Generative Models for Medical Imaging. IEEE Transactions on Medical Imaging (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 4049-4058","author":"Park Geon Yeong","year":"2025","unstructured":"Geon Yeong Park, Sang Wan Lee, and Jong Chul Ye. 2025. Inference-Time Diffusion Model Distillation. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 4049-4058."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3116416"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-024-79602-w"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI53787.2023.10230567"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1093\/bjrai\/ubae006"},{"key":"e_1_3_2_1_47_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 28596-28608","author":"Sehwag Vikash","year":"2025","unstructured":"Vikash Sehwag, Xianghao Kong, Jingtao Li, Michael Spranger, and Lingjuan Lyu. 2025. Stretching Each Dollar: Diffusion Training from Scratch on a Micro-Budget. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 28596-28608."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3706599.3720274"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1093\/bjrai\/ubae007"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-024-03359-y"},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the 37th Asilomar Conference on Signals, Systems and Computers.","author":"Wang Zhou","unstructured":"Zhou Wang, Eero P. Simoncelli, and Alan C. Bovik. 2003. Multi-Scale Structural Similarity for Image Quality Assessment. In Proceedings of the 37th Asilomar Conference on Signals, Systems and Computers."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2024.1408843"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1093\/bjrai\/ubae013"},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, 6211-6220","author":"Wimbauer Felix","year":"2024","unstructured":"Felix Wimbauer, Bichen Wu, Edgar Schoenfeld, Xiaoliang Dai, Ji Hou, Zijian He, Artsiom Sanakoyeu, Peizhao Zhang, Sam Tsai, Jonas Kohler, Christian Rupprecht, Daniel Cremers, Peter Vajda, and Jialiang Wang. 2024. Cache Me if You Can: Accelerating Diffusion Models through Block Caching. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, 6211-6220."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2024.3355455"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-025-02035-w"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.5547"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00045"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2019.101552"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2023.10272667"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI56570.2024.10635518"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","unstructured":"J. Zhao and S. Li. 2025. Radiomics-Driven Diffusion Model and Monte Carlo Compression Sampling for Reliable Medical Image Synthesis. IEEE Journal of Biomedical and Health Informatics (2025). doi:10.1109\/JBHI.2025.3602674","DOI":"10.1109\/JBHI.2025.3602674"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2022.3214766"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1291"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"crossref","unstructured":"\u00d6zg\u00fcn \u00c7i\u00e7ek Ahmed Abdulkadir Soeren S. Lienkamp Thomas Brox and Olaf Ronneberger. 2016. 3D U-Net: Learning Dense Volumetric Segmentation from Sparse Annotation. In Medical Image Computing and Computer-Assisted Intervention - MICCAI.","DOI":"10.1007\/978-3-319-46723-8_49"}],"event":{"name":"ICPE '26: 17th ACM\/SPEC International Conference on Performance Engineering","location":"Florence Italy","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","SIGMETRICS ACM Special Interest Group on Measurement and Evaluation","SPEC"]},"container-title":["Proceedings of the 17th ACM\/SPEC International Conference on Performance Engineering"],"original-title":[],"deposited":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T12:28:27Z","timestamp":1776947307000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3777884.3797012"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,3]]},"references-count":65,"alternative-id":["10.1145\/3777884.3797012","10.1145\/3777884"],"URL":"https:\/\/doi.org\/10.1145\/3777884.3797012","relation":{},"subject":[],"published":{"date-parts":[[2026,5,3]]},"assertion":[{"value":"2026-05-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}