{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T15:57:50Z","timestamp":1765382270084,"version":"3.28.0"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9,22]]},"DOI":"10.1109\/hpec43674.2020.9286138","type":"proceedings-article","created":{"date-parts":[[2020,12,22]],"date-time":"2020-12-22T21:07:15Z","timestamp":1608671235000},"page":"1-9","source":"Crossref","is-referenced-by-count":7,"title":["Inference Benchmarking on HPC Systems"],"prefix":"10.1109","author":[{"given":"Wesley","family":"Brewer","sequence":"first","affiliation":[]},{"given":"Greg","family":"Behm","sequence":"additional","affiliation":[]},{"given":"Alan","family":"Scheinine","sequence":"additional","affiliation":[]},{"given":"Ben","family":"Parsons","sequence":"additional","affiliation":[]},{"given":"Wesley","family":"Emeneker","sequence":"additional","affiliation":[]},{"given":"Robert P.","family":"Trevino","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"1","article-title":"Large Scale Distributed Deep Networks","author":"dean","year":"2012","journal-title":"NIPS 2012 Neural Information Processing Systems"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"journal-title":"Accurate large minibatch sgd Training imagenet in 1 hour","year":"2017","author":"goyal","key":"ref12"},{"journal-title":"Parallel and Distributed Deep Learning","year":"2016","author":"hegde","key":"ref13"},{"key":"ref14","volume":"14","author":"metz","year":"2018","journal-title":"Big Bets on A I Open a New Frontier for Chip Start-Ups Too"},{"journal-title":"Deepbench","year":"2016","author":"narang","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3302541.3313098"},{"journal-title":"mLPerf Inference Benchmark","year":"2019","author":"reddi","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2019.8916327"},{"journal-title":"Benchmarking GPUDirect RDMA on Modern Server Platforms","year":"2014","author":"rossetti","key":"ref19"},{"journal-title":"A modular benchmarking infrastructure for highperformance and reproducible deep learning","year":"2019","author":"ben-nun","key":"ref4"},{"journal-title":"NVIDIA Tesla Deep Learning Product Performance","year":"2019","key":"ref3"},{"journal-title":"Best Practices for Scaling Deep Learning Training and Inference with TensorFlow on Intel Xeon Processor-Based HPC","year":"2019","author":"bhandare","key":"ref6"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3320060"},{"key":"ref8","article-title":"iBench: a Distributed Inference Simulation and Benchmark Suite","author":"brewer","year":"2020","journal-title":"IEEE High Performance Extreme Computing Conference"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3320288.3320304"},{"journal-title":"TESLA V100 performance guide Deep learning and HPC applications","year":"2018","key":"ref2"},{"journal-title":"Nvidia Deep Learning Platform - Giant Leaps In Performance And Efficiency For AI Services From The Data Center To The Network's Edge","year":"2017","key":"ref1"},{"key":"ref9","first-page":"102","article-title":"Dawnbench: An end-to-end deep learning benchmark and competition","volume":"100","author":"coleman","year":"2017","journal-title":"Training"},{"journal-title":"Horovod fast and easy distributed deep learning in tensorflow","year":"2018","author":"sergeev","key":"ref20"},{"key":"ref22","first-page":"2350","article-title":"Staleness-Aware Async-SGD for distributed deep learning","author":"wei","year":"2016","journal-title":"IJCAI International Joint Conference on Artificial Intelligence"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2017.2761740"},{"key":"ref24","first-page":"1","article-title":"Deep residual learning for image Recognition","author":"wu","year":"2017","journal-title":"Multimedia Tools and Applications"},{"key":"ref23","first-page":"1510","article-title":"TernGrad: Ternary gradients to reduce communication in distributed deep learning","author":"wen","year":"2017","journal-title":"Advances in neural information processing systems"},{"journal-title":"Tbd Benchmarking and analyzing deep neural network training","year":"2018","author":"zhu","key":"ref25"}],"event":{"name":"2020 IEEE High Performance Extreme Computing Conference (HPEC)","start":{"date-parts":[[2020,9,22]]},"location":"Waltham, MA, USA","end":{"date-parts":[[2020,9,24]]}},"container-title":["2020 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9285977\/9286137\/09286138.pdf?arnumber=9286138","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T15:54:48Z","timestamp":1656345288000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9286138\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,22]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/hpec43674.2020.9286138","relation":{},"subject":[],"published":{"date-parts":[[2020,9,22]]}}}