{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T17:08:19Z","timestamp":1730221699222,"version":"3.28.0"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,9,13]],"date-time":"2023-09-13T00:00:00Z","timestamp":1694563200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,9,13]],"date-time":"2023-09-13T00:00:00Z","timestamp":1694563200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,9,13]]},"DOI":"10.1109\/fdl59689.2023.10272088","type":"proceedings-article","created":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T18:23:26Z","timestamp":1696875806000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Hybrid PTX Analysis for GPU accelerated CNN inferencing aiding Computer Architecture Design"],"prefix":"10.1109","author":[{"given":"Christopher A.","family":"Metz","sequence":"first","affiliation":[{"name":"Institute of Computer Science, University of Bremen,Bremen,Germany"}]},{"given":"Christina","family":"Plump","sequence":"additional","affiliation":[{"name":"Cyber-Physical Systems, DFKI GmbH,Bremen,Germany"}]},{"given":"Bernhard J.","family":"Berger","sequence":"additional","affiliation":[{"name":"Institute of Embedded Systems, Hamburg University of Technology,Hamburg,Germany"}]},{"given":"Rolf","family":"Drechsler","sequence":"additional","affiliation":[{"name":"Institute of Computer Science, University of Bremen,Bremen,Germany"}]}],"member":"263","reference":[{"key":"ref13","first-page":"166","article-title":"Ml-based power estimation of convolutional neural networks on gpgus","author":"metz","year":"0","journal-title":"IEEE Symposium on Design and Diagnostic of Electronic Circuits and Systems"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3458744.3473356"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/93548.93576"},{"journal-title":"Profiler User's Guide","year":"0","key":"ref15"},{"key":"ref37","article-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications","volume":"abs 1704 4861","author":"howard","year":"2017","journal-title":"CoRR"},{"key":"ref14","first-page":"103","article-title":"Towards neural hardware search: Power estimation of cnns for gpgpus with dynamic frequency scaling","author":"metz","year":"2022","journal-title":"Proceedings of the 2022 ACM\/IEEE Workshop on Machine Learning for CAD"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750375"},{"journal-title":"Parallel Thread Execution ISA","year":"0","key":"ref30"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00055"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/567532.567555"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2951218"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/1816038.1815998"},{"journal-title":"Summit GPU Supercomputer Enables Smarter Science","year":"2018","author":"foertter","key":"ref2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2017.3641648"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/2962131"},{"journal-title":"Professional CUDA C Programming","year":"2014","author":"cheng","key":"ref39"},{"journal-title":"Cupti documentation","year":"2022","key":"ref16"},{"key":"ref38","article-title":"Learning transferable architectures for scalable image recognition","volume":"abs 1707 7012","author":"zoph","year":"2017","journal-title":"CoRR"},{"key":"ref19","first-page":"257","article-title":"A formal analysis of the nvidia ptx memory consistency model","author":"lustig","year":"2019","journal-title":"Proceedings of the fourth international conference on Architectural support for programming languages and operating systems - AS"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/PMBS49563.2019.00014"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2010.43"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306801"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3115243"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/1555815.1555775"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00026"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056063"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/75277.75280"},{"journal-title":"GPU Ocelot","year":"2012","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysarc.2020.101756"},{"journal-title":"Kernel profiling guide - user manual","year":"2020","key":"ref28"},{"journal-title":"Volta architecture whitepaper","year":"0","key":"ref27"},{"journal-title":"CUDA Toolkit Documentation","year":"2022","key":"ref29"},{"key":"ref8","article-title":"Accurate, large minibatch SGD: training imagenet in 1 hour","volume":"abs 1706 2677","author":"goyal","year":"2017","journal-title":"CoRR"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref9","article-title":"Extremely large minibatch SGD: training resnet-50 on imagenet in 15 minutes","volume":"abs 1711 4325","author":"akiba","year":"2017","journal-title":"CoRR"},{"journal-title":"New gpu-accelerated supercomputers change the balance of power on the top500","year":"2018","author":"feldman","key":"ref4"},{"journal-title":"List statistics","year":"2022","key":"ref3"},{"key":"ref6","article-title":"Deep residual learning for image recognition","volume":"abs 1512 3385","author":"he","year":"2015","journal-title":"CoRR"},{"journal-title":"NVIDIA V100 Tensor Core GPU","year":"2020","key":"ref5"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/278283.278285"}],"event":{"name":"2023 Forum on Specification & Design Languages (FDL)","start":{"date-parts":[[2023,9,13]]},"location":"Turin, Italy","end":{"date-parts":[[2023,9,15]]}},"container-title":["2023 Forum on Specification &amp; Design Languages (FDL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10272030\/10272044\/10272088.pdf?arnumber=10272088","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,30]],"date-time":"2023-10-30T18:38:31Z","timestamp":1698691111000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10272088\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,13]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/fdl59689.2023.10272088","relation":{},"subject":[],"published":{"date-parts":[[2023,9,13]]}}}