{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T09:33:11Z","timestamp":1761989591065},"reference-count":32,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput."],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/tc.2021.3084407","type":"journal-article","created":{"date-parts":[[2021,5,27]],"date-time":"2021-05-27T20:43:22Z","timestamp":1622148202000},"page":"1-1","source":"Crossref","is-referenced-by-count":10,"title":["AEML: An Acceleration Engine for Multi-GPU Load-balancing in Distributed Heterogeneous Environment"],"prefix":"10.1109","author":[{"given":"Zhuo","family":"Tang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lifan","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuedong","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kenli","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/1327452.1327492"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2886107.2886110"},{"key":"ref4","article-title":"Flink Programming Guide","year":"2012"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2656206"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2963098"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2013.17"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2013.222"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/NAS.2015.7255222"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2016.7840613"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2907294.2907307"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS.2016.0108"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2017.41"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2016.69"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2018.2794343"},{"key":"ref16","first-page":"485","article-title":"Tiresias: A GPU cluster manager for distributed deep learning","volume-title":"Proc. 16th USENIX Sym. Netw. Syst. Des. Implementation","author":"Gu"},{"key":"ref17","article-title":"TicTac: Accelerating distributed deep learning with communication scheduling","author":"Hashemi"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359642"},{"key":"ref19","article-title":"Compute unified device architecture programming guide","year":"2007"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2014.6865444"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICUFN.2017.7993784"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CLOUDCOM-ASIA.2013.17"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.3904"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2014.16"},{"key":"ref25","first-page":"283","article-title":"Shared memory multiplexing: A novel way to improve GPGPU throughput","volume-title":"Proc. 21st Int. Conf. Parallel Architectures Compilation Techn.","author":"Yang"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2014.7004245"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470413"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/HPCS.2010.5547097"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.23919\/IConAC.2017.8082085"},{"key":"ref30","first-page":"217","article-title":"MapCG: Writing parallel program portable between CPU and GPU","volume-title":"Proc. 19th Int. Conf. Parallel Architectures Compilation Techn.","author":"Hong"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2010.158"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.16"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2013.246"}],"container-title":["IEEE Transactions on Computers"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/12\/4358213\/09442941.pdf?arnumber=9442941","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T23:03:04Z","timestamp":1704841384000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9442941\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/tc.2021.3084407","relation":{},"ISSN":["0018-9340","1557-9956","2326-3814"],"issn-type":[{"value":"0018-9340","type":"print"},{"value":"1557-9956","type":"electronic"},{"value":"2326-3814","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}