{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:13:12Z","timestamp":1740100392341,"version":"3.37.3"},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T00:00:00Z","timestamp":1632096000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T00:00:00Z","timestamp":1632096000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T00:00:00Z","timestamp":1632096000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006602","name":"Air Force Research Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006602","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9,20]]},"DOI":"10.1109\/hpec49654.2021.9622863","type":"proceedings-article","created":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T20:57:36Z","timestamp":1638392256000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Serving Machine Learning Inference Using Heterogeneous Hardware"],"prefix":"10.1109","author":[{"given":"Baolin","family":"Li","sequence":"first","affiliation":[]},{"given":"Vijay","family":"Gadepally","sequence":"additional","affiliation":[]},{"given":"Siddharth","family":"Samsi","sequence":"additional","affiliation":[]},{"given":"Mark","family":"Veillette","sequence":"additional","affiliation":[]},{"given":"Devesh","family":"Tiwari","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref38","article-title":"Sevir: A storm event imagery dataset for deep learning applications in radar and satellite meteorology","volume":"33","author":"veillette","year":"2020","journal-title":"Advances in neural information processing systems"},{"article-title":"Yolov3: An incremental improvement","year":"2018","author":"redmon","key":"ref33"},{"key":"ref32","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"2015","journal-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/EuCNC.2017.7980743"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/PDP.2017.31"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1029\/2019EA000812"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1175\/BAMS-D-11-00263.1"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1175\/1520-0450(2004)043<0231:POPFCR>2.0.CO;2"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1175\/1520-0426(1993)010<0785:TTITAA>2.0.CO;2"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190517"},{"key":"ref40","first-page":"779","article-title":"You only look once: Unified, real-time object detection","author":"redmon","year":"2016","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref11","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","author":"xiao","year":"2018","journal-title":"13th USENIX Symposium on Operating Systems Design and Implementation ( OSDI 18)"},{"key":"ref12","first-page":"353","article-title":"{ALERT}: Accurate learning for energy and timeliness","author":"wan","year":"2020","journal-title":"2020 USENIX Annual Technical Conference ( USENIX ATC 20)"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3447555.3465326"},{"key":"ref14","first-page":"633","article-title":"Ship compute or ship data? why not both?","author":"you","year":"2021","journal-title":"NSDI"},{"key":"ref15","first-page":"613","article-title":"Clipper: A low-latency online prediction serving system","author":"crankshaw","year":"2017","journal-title":"14th USENIX Symposium on Networked Systems Design and Implementation NSDI 17)"},{"key":"ref16","first-page":"469","article-title":"Cherrypick: Adaptively unearthing the best cloud configurations for big data analytics","author":"alipourfard","year":"2017","journal-title":"14th USENIX Symposium on Networked Systems Design and Implementation NSDI 17)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155267"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/SERVICES.2013.55"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387547"},{"key":"ref28","first-page":"947","article-title":"Analysis of large-scale multi-tenant {GPU} clusters for {DNN} training workloads","author":"jeon","year":"2019","journal-title":"2019 USENIX Annual Technical Conference ( USENIX ATC 19)"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2017.8254720"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"},{"key":"ref8","first-page":"289","article-title":"Themis: Fair and efficient {GPU} cluster scheduling","author":"mahajan","year":"2020","journal-title":"17th USENIX Symposium on Networked Systems Design and Implementation ( NSDI 20)"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1630"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2020.03.355"},{"key":"ref9","first-page":"485","article-title":"Tiresias: A {GPU} cluster manager for distributed deep learning","author":"gu","year":"2019","journal-title":"16th USENIX Symposium on Networked Systems Design and Implementation ( NSDI 19)"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1561\/0600000079"},{"key":"ref46","first-page":"1","article-title":"CVXPY: A Python-embedded modeling language for convex optimization","volume":"17","author":"diamond","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"ref20","first-page":"307","article-title":"Hetpipe: Enabling large {DNN} training on (whimpy) heterogeneous {GPU} clusters through integration of pipelined model parallelism and data parallelism","author":"park","year":"2020","journal-title":"2020 USENIX Annual Technical Conference ( USENIX ATC 20)"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0120913"},{"key":"ref22","first-page":"1049","article-title":"Mark: Exploiting cloud services for cost-effective, slo-aware machine learning inference serving","author":"zhang","year":"2019","journal-title":"2019 USENIX Annual Technical Conference ( USENIX ATC 19)"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1080\/23307706.2017.1397554"},{"key":"ref21","first-page":"481","article-title":"Heterogeneity-aware cluster scheduling policies for deep learning workloads","author":"narayanan","year":"2020","journal-title":"OSDI 2020 14th USENIX Symposium on Operating Systems Design and Implementation"},{"article-title":"ultralytics\/xview-yolov3: Initial release","year":"2019","author":"jocher","key":"ref42"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00090"},{"article-title":"xview: Objects in context in overhead imagery","year":"2018","author":"lam","key":"ref41"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00073"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1137\/0108053"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD46524.2019.00075"},{"article-title":"Cocktail: Leveraging ensemble learning for optimized model serving in public cloud","year":"2021","author":"gunasekaran","key":"ref25"}],"event":{"name":"2021 IEEE High Performance Extreme Computing Conference (HPEC)","start":{"date-parts":[[2021,9,20]]},"location":"Waltham, MA, USA","end":{"date-parts":[[2021,9,24]]}},"container-title":["2021 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9622740\/9622741\/09622863.pdf?arnumber=9622863","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:53:49Z","timestamp":1652201629000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9622863\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,20]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/hpec49654.2021.9622863","relation":{},"subject":[],"published":{"date-parts":[[2021,9,20]]}}}