{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T11:12:33Z","timestamp":1775819553538,"version":"3.50.1"},"reference-count":41,"publisher":"IBM","issue":"4\/5","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IBM J. Res. &amp; Dev."],"published-print":{"date-parts":[[2017,7,1]]},"DOI":"10.1147\/jrd.2017.2716598","type":"journal-article","created":{"date-parts":[[2017,9,8]],"date-time":"2017-09-08T14:20:05Z","timestamp":1504880405000},"page":"12:1-12:11","source":"Crossref","is-referenced-by-count":9,"title":["Optimizing the efficiency of deep learning through accelerator virtualization"],"prefix":"10.1147","volume":"61","author":[{"given":"M.","family":"Gschwind","sequence":"first","affiliation":[]},{"given":"T.","family":"Kaldewey","sequence":"additional","affiliation":[]},{"given":"D. K.","family":"Tam","sequence":"additional","affiliation":[]}],"member":"3082","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.66"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304609"},{"key":"ref33","article-title":"Improving GPU utilization with Multi-Process Service (MPS)","author":"sah","year":"0"},{"key":"ref32","year":"2016"},{"key":"ref31","first-page":"579","article-title":"gScale: Scaling up GPU virtualization with dynamic sharing of graphics memory\n space","author":"xue","year":"0","journal-title":"Proc 2016 USENIX Annu Tech Conf"},{"key":"ref30","first-page":"517","article-title":"Boosting GPU virtualization performance with hybrid shadow page tables","author":"dong","year":"0","journal-title":"Proc 2015 Usenix Ann Technical Conf"},{"key":"ref37","article-title":"BVLC AlexNet Model","author":"shelhamer","year":"0"},{"key":"ref36","article-title":"The Next Platform, Refreshed IBM Power Linux systems add NVLINK","author":"morgan","year":"0"},{"key":"ref35","article-title":"Accelerating Watson's performance with OpenPOWER","author":"high","year":"0","journal-title":"Proc Int Conf High Perform Comput Netw Storage Anal"},{"key":"ref34","year":"0"},{"key":"ref10","article-title":"Stochastic gradient descent algorithm in the computational network\n toolkit","author":"guenter","year":"0","journal-title":"Proc NIPS Workshop Optim Mach Learn"},{"key":"ref40","article-title":"Logical partitioning and virtualization in a\n heterogeneous architecture","author":"day","year":"2011"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref12","article-title":"Torch7: A MATLAB-like environment for machine learning","author":"collobert","year":"2011","journal-title":"NIPS Big Learning Workshop"},{"key":"ref13","article-title":"Theano: A CPU and GPU math expression compiler","author":"bergstra","year":"0","journal-title":"Proc Python Sci Comput Conf"},{"key":"ref14","article-title":"MXNet: A\n flexible and efficient machine learning library for heterogeneous distributed systems","author":"chen","year":"2015","journal-title":"Proc NIPS Workshop Mach Learn Syst"},{"key":"ref15","year":"0"},{"key":"ref16","first-page":"1737","article-title":"Deep learning with\n limited numerical precision","author":"gupta","year":"0","journal-title":"Proceedings of the 32nd Intl Conf on Machine Learning"},{"key":"ref17","year":"0"},{"key":"ref18","author":"hannun","year":"2014"},{"key":"ref19","article-title":"Deep image: Scaling up image recognition","author":"wu","year":"2015"},{"key":"ref28","first-page":"109","article-title":"GPUvm: Why not virtualizing GPUs at the\n hypervisor?","author":"suzuki","year":"0","journal-title":"Proc 2014 Usenix Annual Technical Conf"},{"key":"ref4","article-title":"Facebook to open-source AI hardware design","author":"lee","year":"0"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CLOUD.2014.90"},{"key":"ref3","year":"0"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"ref29","first-page":"121","article-title":"A\n full GPU virtualization solution with mediated pass-through","author":"tian","year":"0","journal-title":"Proc 2014 Usenix Annual Technical Conf"},{"key":"ref5","year":"0"},{"key":"ref8","article-title":"cuDNN:\n Efficient primitives for deep learning","author":"chetlur","year":"2014"},{"key":"ref7","year":"0"},{"key":"ref2","year":"0"},{"key":"ref9","article-title":"TensorFlow: Large-scale machine learning on heterogeneous\n distributed systems","author":"abadi","year":"2016"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aimag.v36i4.2618"},{"key":"ref20","article-title":"Theano-based large-scale visual\n recognition with multiple GPUs","author":"ding","year":"2014"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2006.41"},{"key":"ref21","article-title":"Fast convolutional nets with fbfft: A GPU performance evaluation","author":"vasilache","year":"0"},{"key":"ref24","year":"2014"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2015.7477456"},{"key":"ref23","year":"0"},{"key":"ref26","year":"0"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2017.2709198"}],"container-title":["IBM Journal of Research and Development"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5288520\/8030196\/08030299.pdf?arnumber=8030299","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T18:03:22Z","timestamp":1761588202000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8030299\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7,1]]},"references-count":41,"journal-issue":{"issue":"4\/5"},"URL":"https:\/\/doi.org\/10.1147\/jrd.2017.2716598","relation":{},"ISSN":["0018-8646","0018-8646"],"issn-type":[{"value":"0018-8646","type":"print"},{"value":"0018-8646","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,7,1]]}}}