{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:07:15Z","timestamp":1755907635285,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,17]],"date-time":"2024-07-17T00:00:00Z","timestamp":1721174400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1818253, 1854828, 2007991, 2018627, 2311830, 2312927"],"award-info":[{"award-number":["1818253, 1854828, 2007991, 2018627, 2311830, 2312927"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"XRAC","award":["NCR-130002"],"award-info":[{"award-number":["NCR-130002"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,17]]},"DOI":"10.1145\/3626203.3670548","type":"proceedings-article","created":{"date-parts":[[2024,7,17]],"date-time":"2024-07-17T20:12:20Z","timestamp":1721247140000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Infer-HiRes: Accelerating Inference for High-Resolution Images with Quantization and Distributed Deep Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2591-1082","authenticated-orcid":false,"given":"Radha","family":"Gulhane","sequence":"first","affiliation":[{"name":"The Ohio State University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6823-9080","authenticated-orcid":false,"given":"Quentin","family":"Anthony","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1924-2769","authenticated-orcid":false,"given":"Aamir","family":"Shafi","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1200-2754","authenticated-orcid":false,"given":"Hari","family":"Subramoni","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0356-1781","authenticated-orcid":false,"given":"Dhabaleswar K.","family":"Panda","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,7,17]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2014. The CIFAR-10 Dataset. https:\/\/www.cs.toronto.edu\/\u00a0kriz\/cifar.html. Accessed: 2024-01-31."},{"key":"e_1_3_2_1_2_1","unstructured":"2016. Camelyon 2016. https:\/\/camelyon16.grand-challenge.org\/. Accessed: 2024-01-31."},{"key":"e_1_3_2_1_3_1","unstructured":"Hyunho Ahn Tian Chen Nawras Alnaasan Aamir Shafi Mustafa Abduljabbar Hari Subramoni Dhabaleswar K. and Panda. 2023. Performance Characterization of using Quantization for DNN Inference on Edge Devices: Extended Version. arxiv:2303.05016\u00a0[cs.PF]"},{"key":"e_1_3_2_1_4_1","unstructured":"Jon Braatz Pranav Rajpurkar Stephanie Zhang Andrew\u00a0Y. Ng and Jeanne Shen. 2022. Deep Learning-Based Sparse Whole-Slide Image Analysis for the Diagnosis of Gastric Intestinal Metaplasia. arxiv:2201.01449\u00a0[eess.IV]"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_6_1","unstructured":"NVIDIA Developer. 2016. Nvidia Collective Communications Library (NCCL). https:\/\/developer.nvidia.com\/nccl. Accessed: 2024-01-31."},{"key":"e_1_3_2_1_7_1","unstructured":"NVIDIA Developer. 2019. NVIDIA TensorRT. https:\/\/developer.nvidia.com\/tensorrt\/. Accessed: 2024-01-31."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Yinpeng Dong Renkun Ni Jianguo Li Yurong Chen Jun Zhu and Hang Su. 2017. Learning Accurate Low-Bit Deep Neural Networks with Stochastic Quantization. arxiv:1708.01001\u00a0[cs.CV]","DOI":"10.5244\/C.31.189"},{"key":"e_1_3_2_1_9_1","unstructured":"Fastai. [n. d.]. GitHub - fastai\/imagenette: A smaller subset of 10 easily classified classes from Imagenet and a little more French. https:\/\/github.com\/fastai\/imagenette"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2020.3040269"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Amir Gholami Sehoon Kim Zhen Dong Zhewei Yao Michael\u00a0W. Mahoney and Kurt Keutzer. 2021. A Survey of Quantization Methods for Efficient Neural Network Inference. arxiv:2103.13630\u00a0[cs.CV]","DOI":"10.1201\/9781003162810-13"},{"key":"e_1_3_2_1_12_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2015. Deep Residual Learning for Image Recognition. arxiv:1512.03385\u00a0[cs.CV]"},{"key":"e_1_3_2_1_13_1","unstructured":"Yanping Huang Youlong Cheng Ankur Bapna Orhan Firat Mia\u00a0Xu Chen Dehao Chen HyoukJoong Lee Jiquan Ngiam Quoc\u00a0V. Le Yonghui Wu and Zhifeng Chen. 2019. GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism. arxiv:1811.06965\u00a0[cs.CV]"},{"key":"e_1_3_2_1_14_1","volume-title":"Deep learning models for histopathological classification of gastric and colonic epithelial tumours. Scientific reports 10, 1","author":"Iizuka Osamu","year":"2020","unstructured":"Osamu Iizuka, Fahdi Kanavati, Kei Kato, Michael Rambeau, Koji Arihiro, and Masayuki Tsuneki. 2020. Deep learning models for histopathological classification of gastric and colonic epithelial tumours. Scientific reports 10, 1 (2020), 1504."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00049"},{"volume-title":"Hy-Fi: Hybrid Five-Dimensional Parallel DNN Training on\u00a0High-Performance GPU Clusters","author":"Jain Arpan","key":"e_1_3_2_1_16_1","unstructured":"Arpan Jain, Aamir Shafi, Quentin Anthony, Pouya Kousha, Hari Subramoni, and Dhableswar\u00a0K. Panda. 2022. Hy-Fi: Hybrid Five-Dimensional Parallel DNN Training on\u00a0High-Performance GPU Clusters. In High Performance Computing, Ana-Lucia Varbanescu, Abhinav Bhatele, Piotr Luszczek, and Baboulin Marc (Eds.). Springer International Publishing, Cham, 109\u2013130."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41698-024-00499-9"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Mahendra Khened Avinash Kori Haran Rajkumar Balaji Srinivasan and Ganapathy Krishnamurthi. 2020. A Generalized Deep Learning Framework for Whole-Slide Image Segmentation and Analysis. arxiv:2001.00258\u00a0[eess.IV]","DOI":"10.1038\/s41598-021-90444-8"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2023.3246032"},{"key":"e_1_3_2_1_20_1","unstructured":"Zhikai Li and Qingyi Gu. 2023. I-ViT: Integer-only Quantization for Efficient Vision Transformer Inference. arxiv:2207.01405\u00a0[cs.CV]"},{"volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","key":"e_1_3_2_1_21_1","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32, H.\u00a0Wallach, H.\u00a0Larochelle, A.\u00a0Beygelzimer, F.\u00a0d\u2019Alch\u00e9 Buc, E.\u00a0Fox, and R.\u00a0Garnett (Eds.). Curran Associates, Inc., 8024\u20138035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Andr\u00e9 Pedersen Marit Valla Anna\u00a0M. Bofin Javier\u00a0P\u00e9rez de Frutos Ingerid Reinertsen and Erik Smistad. 2020. FastPathology: An open-source platform for deep learning-based research and decision support in digital pathology. arxiv:2011.06033\u00a0[cs.LG]","DOI":"10.1109\/ACCESS.2021.3072231"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of Machine Learning and Systems 5","author":"Pope Reiner","year":"2023","unstructured":"Reiner Pope, Sholto Douglas, Aakanksha Chowdhery, Jacob Devlin, James Bradbury, Jonathan Heek, Kefan Xiao, Shivani Agrawal, and Jeff Dean. 2023. Efficiently scaling transformer inference. Proceedings of Machine Learning and Systems 5 (2023)."},{"key":"e_1_3_2_1_24_1","unstructured":"Esteban Real Alok Aggarwal Yanping Huang and Quoc\u00a0V Le. 2019. Regularized Evolution for Image Classifier Architecture Search. arxiv:1802.01548\u00a0[cs.NE]"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.modpat.2023.100196"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Olaf Ronneberger Philipp Fischer and Thomas Brox. 2015. U-Net: Convolutional Networks for Biomedical Image Segmentation. arxiv:1505.04597\u00a0[cs.CV]","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592979.3593401"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-019-40041-7"},{"key":"e_1_3_2_1_29_1","unstructured":"Hao Wu Patrick Judd Xiaojie Zhang Mikhail Isaev and Paulius Micikevicius. 2020. Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation. arxiv:2004.09602\u00a0[cs.LG]"},{"key":"e_1_3_2_1_30_1","unstructured":"Shuang Wu Guoqi Li Feng Chen and Luping Shi. 2018. Training and Inference with Integers in Deep Neural Networks. arxiv:1802.04680\u00a0[cs.LG]"},{"key":"e_1_3_2_1_31_1","unstructured":"Guangxuan Xiao Ji Lin Mickael Seznec Hao Wu Julien Demouth and Song Han. 2023. SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models. arxiv:2211.10438\u00a0[cs.CL]"}],"event":{"name":"PEARC '24: Practice and Experience in Advanced Research Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"],"location":"Providence RI USA","acronym":"PEARC '24"},"container-title":["Practice and Experience in Advanced Research Computing 2024: Human Powered Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626203.3670548","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626203.3670548","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:56:47Z","timestamp":1755867407000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626203.3670548"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,17]]},"references-count":31,"alternative-id":["10.1145\/3626203.3670548","10.1145\/3626203"],"URL":"https:\/\/doi.org\/10.1145\/3626203.3670548","relation":{},"subject":[],"published":{"date-parts":[[2024,7,17]]},"assertion":[{"value":"2024-07-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}