{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T04:57:31Z","timestamp":1767848251875,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,4,13]],"date-time":"2019-04-13T00:00:00Z","timestamp":1555113600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,4,13]]},"DOI":"10.1145\/3300053.3319418","type":"proceedings-article","created":{"date-parts":[[2019,4,10]],"date-time":"2019-04-10T19:07:28Z","timestamp":1554923248000},"page":"12-21","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":24,"title":["Detailed Characterization of Deep Neural Networks on GPUs and FPGAs"],"prefix":"10.1145","author":[{"given":"Aajna","family":"Karki","sequence":"first","affiliation":[{"name":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chethan Palangotu","family":"Keshava","sequence":"additional","affiliation":[{"name":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Spoorthi Mysore","family":"Shivakumar","sequence":"additional","affiliation":[{"name":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joshua","family":"Skow","sequence":"additional","affiliation":[{"name":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Goutam Madhukeshwar","family":"Hegde","sequence":"additional","affiliation":[{"name":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyeran","family":"Jeon","sequence":"additional","affiliation":[{"name":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2019,4,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Standard Performance Evaluation Corporation (SPEC) Benchmark suite. {Online}. Available: https:\/\/www.spec.org\/  Standard Performance Evaluation Corporation (SPEC) Benchmark suite. {Online}. Available: https:\/\/www.spec.org\/"},{"key":"e_1_3_2_1_2_1","unstructured":"Princeton Application Repository for Shared-Memory Computers (PARSEC) Benchmark Suite. {Online}. Available: http:\/\/parsec.cs.princeton.edu\/  Princeton Application Repository for Shared-Memory Computers (PARSEC) Benchmark Suite. {Online}. Available: http:\/\/parsec.cs.princeton.edu\/"},{"key":"e_1_3_2_1_3_1","unstructured":"Baidu DeepBench. {Online}. Available: https:\/\/svail.github.io\/DeepBench\/  Baidu DeepBench. {Online}. Available: https:\/\/svail.github.io\/DeepBench\/"},{"key":"e_1_3_2_1_4_1","volume-title":"USA","author":"Adolf R.","year":"2016","unstructured":"R. Adolf , S. Rama , B. Reagen , G. Wei , and D. Brooks , \" Fathom: Reference Workloads for Modern Deep Learning Methods,\" in IEEE International Symposium on Workload Characterization (IISWC), Providence, RI , USA , Oct 2016 . R. Adolf, S. Rama, B. Reagen, G. Wei, and D. Brooks, \"Fathom: Reference Workloads for Modern Deep Learning Methods,\" in IEEE International Symposium on Workload Characterization (IISWC), Providence, RI, USA, Oct 2016."},{"key":"e_1_3_2_1_5_1","volume-title":"TBD: Benchmarking and Analyzing Deep Neural Network Training,\" in arXiv:1803.06905","author":"Zhu H.","year":"2018","unstructured":"H. Zhu , M. Akrout , B. Zheng , A. Pelegris , A. Phanishayee , B. Schroeder , and G. Pekhimenko , \" TBD: Benchmarking and Analyzing Deep Neural Network Training,\" in arXiv:1803.06905 , 2018 . H. Zhu, M. Akrout, B. Zheng, A. Pelegris, A. Phanishayee, B. Schroeder, and G. Pekhimenko, \"TBD: Benchmarking and Analyzing Deep Neural Network Training,\" in arXiv:1803.06905, 2018."},{"key":"e_1_3_2_1_6_1","volume-title":"USA","author":"Coleman C.","year":"2016","unstructured":"C. Coleman , D. Narayanan , D. Kang , T. Zhao , J. Zhang , L. Nardi , P. Bailis , K. Olukotun , C. R\u00c3l' , and M. Zaharia , \" DAWNBench: An End-to-End Deep Learning Benchmark and Competition,\" in Conference on Neural Information Processing Systems (NIPS), Long Beach, CA , USA , Sep 2016 . C. Coleman, D. Narayanan, D. Kang, T. Zhao, J. Zhang, L. Nardi, P. Bailis, K. Olukotun, C. R\u00c3l', and M. Zaharia, \"DAWNBench: An End-to-End Deep Learning Benchmark and Competition,\" in Conference on Neural Information Processing Systems (NIPS), Long Beach, CA, USA, Sep 2016."},{"key":"e_1_3_2_1_7_1","unstructured":"TensorFlow Benchmarks. {Online}. Available: https:\/\/www.tensorflow.org\/performance\/benchmarks  TensorFlow Benchmarks. {Online}. Available: https:\/\/www.tensorflow.org\/performance\/benchmarks"},{"key":"e_1_3_2_1_8_1","unstructured":"MLPerf: A broad ML benchmark suite for measuring performance of ML software frameworks ML hardware accelerators and ML cloud platforms. {Online}. Available: https:\/\/mlperf.org\/  MLPerf: A broad ML benchmark suite for measuring performance of ML software frameworks ML hardware accelerators and ML cloud platforms. {Online}. Available: https:\/\/mlperf.org\/"},{"key":"e_1_3_2_1_9_1","unstructured":"TensorFlow. {Online}. Available: https:\/\/www.tensorflow.org\/  TensorFlow. {Online}. Available: https:\/\/www.tensorflow.org\/"},{"key":"e_1_3_2_1_10_1","unstructured":"Keras: The Python Deep Learning library. {Online}. Available: https:\/\/keras.io\/  Keras: The Python Deep Learning library. {Online}. Available: https:\/\/keras.io\/"},{"key":"e_1_3_2_1_11_1","unstructured":"NVIDIA CUDA Deep Neural Network library (cuDNN). {Online}. Available: https:\/\/developer.nvidia.com\/cudnn  NVIDIA CUDA Deep Neural Network library (cuDNN). {Online}. Available: https:\/\/developer.nvidia.com\/cudnn"},{"key":"e_1_3_2_1_12_1","unstructured":"A Domain-Specific Architecture for Deep Neural Networks. {Online}. Available: https:\/\/cacm.acm.org\/magazines\/2018\/9\/230571-a-domain-specific-architecture-for-deep-neural-networks  A Domain-Specific Architecture for Deep Neural Networks. {Online}. Available: https:\/\/cacm.acm.org\/magazines\/2018\/9\/230571-a-domain-specific-architecture-for-deep-neural-networks"},{"key":"e_1_3_2_1_13_1","volume-title":"of Computer Science","author":"Krizhevsky A.","year":"2009","unstructured":"A. Krizhevsky , \"Learning Multiple Layers of Features from Tiny Images,\" in Master\u00e2\u0102&Zacute;s Thesis, Dept. of Computer Science , University of Toronto , 2009 . A. Krizhevsky, \"Learning Multiple Layers of Features from Tiny Images,\" in Master\u00e2\u0102&Zacute;s Thesis, Dept. of Computer Science, University of Toronto, 2009."},{"key":"e_1_3_2_1_14_1","volume-title":"USA","author":"Krizhevsky A.","year":"2012","unstructured":"A. Krizhevsky , I. Sutskever , and G. E. Hinton , \" Imagenet Classification with Deep Convolutional Neural Networks,\" in Conference on Neural Information Processing Systems (NIPS), Lake Tahoe, NV , USA , Dec 2012 . A. Krizhevsky, I. Sutskever, and G. E. Hinton, \"Imagenet Classification with Deep Convolutional Neural Networks,\" in Conference on Neural Information Processing Systems (NIPS), Lake Tahoe, NV, USA, Dec 2012."},{"key":"e_1_3_2_1_15_1","volume-title":"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and &lt;0.5MB model size,\" in arXiv:1602.07360","author":"Iandola F. N.","year":"2016","unstructured":"F. N. Iandola , S. Han , M. W. Moskewicz , K. Ashraf , W. J. Dally , and K. Keutzer , \" SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and &lt;0.5MB model size,\" in arXiv:1602.07360 , 2016 . F. N. Iandola, S. Han, M. W. Moskewicz, K. Ashraf, W. J. Dally, and K. Keutzer, \"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and &lt;0.5MB model size,\" in arXiv:1602.07360, 2016."},{"key":"e_1_3_2_1_16_1","volume-title":"USA","author":"He K.","year":"2016","unstructured":"K. He , X. Zhang , S. Ren , and J. Sun , \" Deep Residual Learning for Image Recognition,\" in IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Las Vegas, NV , USA , June 2016 . K. He, X. Zhang, S. Ren, and J. Sun, \"Deep Residual Learning for Image Recognition,\" in IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Las Vegas, NV, USA, June 2016."},{"key":"e_1_3_2_1_17_1","volume-title":"USA","author":"Simonyan K.","year":"2015","unstructured":"K. Simonyan and A. Zisserman , \" Very Deep Convolutional Networks For Large-scale Image Recognition,\" in International Conference on Learning Representations (ICLR), San Diego, CA , USA , 2015 . K. Simonyan and A. Zisserman, \"Very Deep Convolutional Networks For Large-scale Image Recognition,\" in International Conference on Learning Representations (ICLR), San Diego, CA, USA, 2015."},{"key":"e_1_3_2_1_18_1","volume-title":"Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation,\" in arXiv:1406.1078","author":"Cho K.","year":"2014","unstructured":"K. Cho , B. van Merrienboer , C. Gulcehre , D. Bahdanau , F. Bougares , H. Schwenk , and Y. Bengio , \" Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation,\" in arXiv:1406.1078 , 2014 . K. Cho, B. van Merrienboer, C. Gulcehre, D. Bahdanau, F. Bougares, H. Schwenk, and Y. Bengio, \"Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation,\" in arXiv:1406.1078, 2014."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015015"},{"key":"e_1_3_2_1_20_1","volume-title":"USA","author":"Bakhoda A.","year":"2009","unstructured":"A. Bakhoda , G. Yuan , W. W. L. Fung , H. Wong , and T. M. Aamodt , \" Analyzing CUDA Workloads Using a Detailed GPU Simulator,\" in IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), Boston, MA , USA , April 2009 . A. Bakhoda, G. Yuan, W. W. L. Fung, H. Wong, and T. M. Aamodt, \"Analyzing CUDA Workloads Using a Detailed GPU Simulator,\" in IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), Boston, MA, USA, April 2009."},{"key":"e_1_3_2_1_21_1","unstructured":"NVIDIA\u00e2\u0102&Zacute;s Next Generation CUDA Compute Architecture: Kepler GK110\/210. {Online}. Available: https:\/\/images.nvidia.com\/content\/pdf\/tesla\/NVIDIA-Kepler-GK110-GK210-Architecture-Whitepaper.pdf  NVIDIA\u00e2\u0102&Zacute;s Next Generation CUDA Compute Architecture: Kepler GK110\/210. {Online}. Available: https:\/\/images.nvidia.com\/content\/pdf\/tesla\/NVIDIA-Kepler-GK110-GK210-Architecture-Whitepaper.pdf"},{"key":"e_1_3_2_1_22_1","unstructured":"NVIDIA TX1. {Online}. Available: https:\/\/developer.nvidia.com\/embedded\/buy\/jetson-tx1  NVIDIA TX1. {Online}. Available: https:\/\/developer.nvidia.com\/embedded\/buy\/jetson-tx1"},{"key":"e_1_3_2_1_23_1","unstructured":"Xilinx PYNQ. {Online}. Available: https:\/\/www.xilinx.com\/support\/university\/boards-portfolio\/xup-boards\/XUPPYNQ.html  Xilinx PYNQ. {Online}. Available: https:\/\/www.xilinx.com\/support\/university\/boards-portfolio\/xup-boards\/XUPPYNQ.html"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3038228.3038239"},{"key":"e_1_3_2_1_25_1","unstructured":"NVIDIA CUDA SDK. {Online}. Available: https:\/\/developer.nvidia.com\/cuda-code-samples  NVIDIA CUDA SDK. {Online}. Available: https:\/\/developer.nvidia.com\/cuda-code-samples"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_28_1","first-page":"140","author":"Xu Q.","year":"2014","unstructured":"Q. Xu , H. Jeon , and M. Annavaram , \"Graph Processing on GPUs: Where are the Bottlenecks? \" in IEEE International Symposium on Workload Characterization (IISWC) , Oct 2014 , pp. 140 -- 149 . Q. Xu, H. Jeon, and M. Annavaram, \"Graph Processing on GPUs: Where are the Bottlenecks?\" in IEEE International Symposium on Workload Characterization (IISWC), Oct 2014, pp. 140--149.","journal-title":"\" in IEEE International Symposium on Workload Characterization (IISWC)"},{"key":"e_1_3_2_1_29_1","unstructured":"PyTorch. {Online}. Available: https:\/\/pytorch.org\/  PyTorch. {Online}. Available: https:\/\/pytorch.org\/"},{"key":"e_1_3_2_1_30_1","volume-title":"Improving computer-aided detection using convolutional neural networks and random view aggregation,\" in IEEE Trans. on Medical Imaging","author":"Roth H.","year":"2016","unstructured":"H. Roth , L. Lu , J. Liu , J. Yao , A. Seff , K. M. Cherry , E. Turkbey , and R. Summers , \" Improving computer-aided detection using convolutional neural networks and random view aggregation,\" in IEEE Trans. on Medical Imaging , 2016 . H. Roth, L. Lu, J. Liu, J. Yao, A. Seff, K. M. Cherry, E. Turkbey, and R. Summers, \"Improving computer-aided detection using convolutional neural networks and random view aggregation,\" in IEEE Trans. on Medical Imaging, 2016."},{"key":"e_1_3_2_1_31_1","volume-title":"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,\" in arXiv:1704.04861","author":"Howard A. G.","year":"2017","unstructured":"A. G. Howard , M. Zhu , B. Chen , D. Kalenichenko , W. Wang , T. Weyand , M. Andreetto , and H. Adam , \" MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,\" in arXiv:1704.04861 , 2017 . A. G. Howard, M. Zhu, B. Chen, D. Kalenichenko, W. Wang, T. Weyand, M. Andreetto, and H. Adam, \"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,\" in arXiv:1704.04861, 2017."},{"key":"e_1_3_2_1_32_1","unstructured":"ImageNet Large Scale Visual Recognition Challenge (ILSVRC). {Online}. Available: http:\/\/www.image-net.org\/challenges\/LSVRC\/  ImageNet Large Scale Visual Recognition Challenge (ILSVRC). {Online}. Available: http:\/\/www.image-net.org\/challenges\/LSVRC\/"},{"key":"e_1_3_2_1_33_1","unstructured":"CIFAR-10 and CIFAR-100 Database. {Online}. Available: https:\/\/www.cs.toronto.edu\/~kriz\/cifar.html  CIFAR-10 and CIFAR-100 Database. {Online}. Available: https:\/\/www.cs.toronto.edu\/~kriz\/cifar.html"},{"key":"e_1_3_2_1_34_1","volume-title":"ImageNet: A Large-Scale Hierarchical Image Database,\" in IEEE Computer Vision and Pattern Recognition","author":"Deng J.","year":"2009","unstructured":"J. Deng , W. Dong , R. Socher , L.-J. Li , K. Li , and L. Fei-Fei , \" ImageNet: A Large-Scale Hierarchical Image Database,\" in IEEE Computer Vision and Pattern Recognition , 2009 . J. Deng, W. Dong, R. Socher, L.-J. Li, K. Li, and L. Fei-Fei, \"ImageNet: A Large-Scale Hierarchical Image Database,\" in IEEE Computer Vision and Pattern Recognition, 2009."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485964"},{"key":"e_1_3_2_1_36_1","unstructured":"NVIDIA Profiler. {Online}. Available: https:\/\/docs.nvidia.com\/cuda\/profiler-users-guide\/index.html  NVIDIA Profiler. {Online}. Available: https:\/\/docs.nvidia.com\/cuda\/profiler-users-guide\/index.html"},{"key":"e_1_3_2_1_37_1","unstructured":"Google Tensor Processing Unit. {Online}. Available: https:\/\/cloudplatform.googleblog.com\/2016\/05\/ Google-supercharges-machine-learning-tasks-with-custom-chip.html  Google Tensor Processing Unit. {Online}. Available: https:\/\/cloudplatform.googleblog.com\/2016\/05\/ Google-supercharges-machine-learning-tasks-with-custom-chip.html"},{"key":"e_1_3_2_1_38_1","volume-title":"Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1,\" in arXiv:1602.02830","author":"Courbariaux M.","year":"2016","unstructured":"M. Courbariaux , I. Hubara , D. Soudry , R. El-Yaniv , and Y. Bengio , \" Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1,\" in arXiv:1602.02830 , 2016 . M. Courbariaux, I. Hubara, D. Soudry, R. El-Yaniv, and Y. Bengio, \"Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1,\" in arXiv:1602.02830, 2016."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830784"}],"event":{"name":"ASPLOS '19: Architectural Support for Programming Languages and Operating Systems","location":"Providence RI USA","acronym":"ASPLOS '19","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 12th Workshop on General Purpose Processing Using GPUs"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3300053.3319418","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3300053.3319418","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:23:51Z","timestamp":1750202631000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3300053.3319418"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4,13]]},"references-count":38,"alternative-id":["10.1145\/3300053.3319418","10.1145\/3300053"],"URL":"https:\/\/doi.org\/10.1145\/3300053.3319418","relation":{},"subject":[],"published":{"date-parts":[[2019,4,13]]},"assertion":[{"value":"2019-04-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}