{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,12,14]],"date-time":"2024-12-14T07:10:30Z","timestamp":1734160230750,"version":"3.30.2"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1109\/mascots64422.2024.10786578","type":"proceedings-article","created":{"date-parts":[[2024,12,13]],"date-time":"2024-12-13T18:50:09Z","timestamp":1734115809000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Fast and Accurate DNN Performance Estimation across Diverse Hardware Platforms"],"prefix":"10.1109","author":[{"given":"Vishwas Vasudeva","family":"Kakrannaya","sequence":"first","affiliation":[{"name":"Pennsylvania State University University Park,PA,USA"}]},{"given":"Siddhartha Balakrishna","family":"Rai","sequence":"additional","affiliation":[{"name":"Pennsylvania State University University Park,PA,USA"}]},{"given":"Anand","family":"Sivasubramaniam","sequence":"additional","affiliation":[{"name":"Pennsylvania State University University Park,PA,USA"}]},{"given":"Timothy","family":"Zhu","sequence":"additional","affiliation":[{"name":"Pennsylvania State University University Park,PA,USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/PMBS49563.2019.00017"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC50251.2020.00026"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2018.8573521"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2018.8573476"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3424669"},{"key":"ref6","article-title":"Scale-sim: Systolic cnn accelerator simulator","author":"Samajdar","year":"2018","journal-title":"arXiv preprint arXiv:1811.02883"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.29"},{"key":"ref8","article-title":"Crosssim","volume-title":"Sandia National Lab.(SNL-NM)","author":"Plimpton","year":"2016"},{"key":"ref9","article-title":"DNNabacus: Toward accurate computational cost prediction for deep neural networks","author":"Bai","year":"2022","journal-title":"arXiv preprint arXiv:2205.12095"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8622396"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CAHPC.2018.8645908"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP58684.2023.00039"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538932"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2023.3294253"},{"key":"ref15","first-page":"503","article-title":"Habitat: A runtime-based computational performance predictor for deep neural network training","volume-title":"Annual Technical Conference. USENIX, 2021","author":"Geoffrey"},{"article-title":"Paleo: A performance model for deep neural networks","volume-title":"International Conference on Learning Representations","author":"Qi","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.435"},{"key":"ref18","article-title":"Fast convolutional nets with fbfft: A gpu performance evaluation","author":"Vasilache","year":"2014","journal-title":"arXiv"},{"key":"ref19","first-page":"8024","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume":"32","author":"Paszke","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","year":"2015","author":"Abadi","key":"ref20"},{"year":"2020","key":"ref21","article-title":"NVIDIA A100 GPU Specification sheet"},{"year":"2019","key":"ref22","article-title":"NVIDIA Quadro RTX 8000 GPU Specification sheet"},{"key":"ref23","article-title":"BenchDNN benchmark in OneDNN"},{"year":"2021","key":"ref24","article-title":"Memory Latency Checker Tool"},{"key":"ref25","article-title":"NVIDIA Nsight Compute"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref29","article-title":"Very deep convnets for large-scale image recognition","author":"Simonyan","year":"2014","journal-title":"Computing Research Repository"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"ref31","first-page":"113","article-title":"Design space for scaling-in general purpose computing within the ddr dram hierarchy for mapreduce workloads","volume-title":"Proceedings of the ACM International Conference on Computing Frontiers","author":"Rai"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527431"}],"event":{"name":"2024 32nd International Conference on Modeling, Analysis and Simulation of Computer and Telecommunication Systems (MASCOTS)","start":{"date-parts":[[2024,10,21]]},"location":"Krakow, Poland","end":{"date-parts":[[2024,10,23]]}},"container-title":["2024 32nd International Conference on Modeling, Analysis and Simulation of Computer and Telecommunication Systems (MASCOTS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10786488\/10786336\/10786578.pdf?arnumber=10786578","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,14]],"date-time":"2024-12-14T06:34:08Z","timestamp":1734158048000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10786578\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/mascots64422.2024.10786578","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]}}}