{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T22:33:15Z","timestamp":1773873195408,"version":"3.50.1"},"reference-count":69,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2022YFB4500703"],"award-info":[{"award-number":["2022YFB4500703"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61972313"],"award-info":[{"award-number":["61972313"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61902211"],"award-info":[{"award-number":["61902211"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62202266"],"award-info":[{"award-number":["62202266"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100017596","name":"Natural Science Basic Research Program of Shaanxi Province","doi-asserted-by":"publisher","award":["2023-JC-JQ-50"],"award-info":[{"award-number":["2023-JC-JQ-50"]}],"id":[{"id":"10.13039\/501100017596","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2022M721831"],"award-info":[{"award-number":["2022M721831"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004318","name":"Microsoft Research Asia","doi-asserted-by":"publisher","award":["100336949"],"award-info":[{"award-number":["100336949"]}],"id":[{"id":"10.13039\/100004318","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Networking"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1109\/tnet.2023.3321967","type":"journal-article","created":{"date-parts":[[2023,10,13]],"date-time":"2023-10-13T18:20:09Z","timestamp":1697221209000},"page":"1509-1523","source":"Crossref","is-referenced-by-count":5,"title":["Automating Cloud Deployment for Real-Time Online Foundation Model Inference"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3180-3511","authenticated-orcid":false,"given":"Yang","family":"Li","sequence":"first","affiliation":[{"name":"School of Software, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7286-122X","authenticated-orcid":false,"given":"Zhenhua","family":"Li","sequence":"additional","affiliation":[{"name":"School of Software, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2880-7100","authenticated-orcid":false,"given":"Zhenhua","family":"Han","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}]},{"given":"Quanlu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0934-5035","authenticated-orcid":false,"given":"Xiaobo","family":"Ma","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","volume-title":"Proc. NIPS","author":"Krizhevsky"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref4","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","volume-title":"Proc. ICML","author":"Graves"},{"key":"ref5","article-title":"Deep Speech: Scaling up end-to-end speech recognition","author":"Hannun","year":"2014","journal-title":"arXiv:1412.5567"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref7","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","volume-title":"Proc. NIPS","author":"Sutskever"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref9","article-title":"Neural machine translation by jointly learning to align and translate","author":"Bahdanau","year":"2014","journal-title":"arXiv:1409.0473"},{"key":"ref10","article-title":"Google\u2019s neural machine translation system: Bridging the gap between human and machine translation","author":"Wu","year":"2016","journal-title":"arXiv:1609.08144"},{"key":"ref11","article-title":"On the opportunities and risks of foundation models","author":"Bommasani","year":"2021","journal-title":"arXiv:2108.07258"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2017.83"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2008.07.035"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3135974.3135993"},{"key":"ref15","first-page":"469","article-title":"CherryPick: Adaptively unearthing the best cloud configurations for big data analytics","volume-title":"Proc. USENIX NSDI","volume":"2","author":"Alipourfard"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3570361.3613259"},{"key":"ref17","article-title":"Device placement optimization with reinforcement learning","author":"Mirhoseini","year":"2017","journal-title":"arXiv:1706.04972"},{"key":"ref18","first-page":"1662","article-title":"Spotlight: Optimizing device placement for training deep neural networks","volume-title":"Proc. ICML","author":"Gao"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155267"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/566171.566186"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.5555\/3026877.3026899"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref24","first-page":"1","article-title":"Neural machine translation by jointly learning to align and translate","volume-title":"Proc. ICLR","author":"Bahdanau"},{"key":"ref25","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. NAACL-HLT","author":"Kenton"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref27","first-page":"1","article-title":"Language models are unsupervised multitask learners","volume-title":"Proc. OpenAI Blog","volume":"1","author":"Radford"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref30","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014","journal-title":"arXiv:1409.1556"},{"key":"ref31","article-title":"GDP: Generalized device placement for dataflow graphs","author":"Zhou","year":"2019","journal-title":"arXiv:1910.01578"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3452296.3472908"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3222509"},{"key":"ref34","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","volume-title":"Proc. USENIX OSDI","author":"Xiao"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3132747.3132766"},{"key":"ref36","first-page":"1","article-title":"Distillating knowledge about SCOTCH","volume-title":"Proc. Dagstuhl Seminar","author":"Pellegrini"},{"key":"ref37","volume-title":"METIS\u2014Unstructured graph partitioning and sparse matrix ordering system","author":"Karypis","year":"1995"},{"key":"ref38","article-title":"Neural combinatorial optimization with reinforcement learning","author":"Bello","year":"2016","journal-title":"arXiv:1611.09940"},{"key":"ref39","first-page":"525","article-title":"BOA: The Bayesian optimization algorithm","volume-title":"Proc. GECCO","author":"Pelikan"},{"key":"ref40","first-page":"2951","article-title":"Practical Bayesian optimization of machine learning algorithms","volume-title":"Proc. NIPS","author":"Snoek"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1023\/A:1008306431147"},{"key":"ref42","first-page":"937","article-title":"Bayesian optimization with inequality constraints","volume-title":"Proc. ICML","author":"Gardner"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-78800-3_24"},{"key":"ref44","first-page":"27","article-title":"Optimizing DNN computation with relaxed graph substitutions","volume-title":"Proc. MLSys","volume":"1","author":"Jia"},{"key":"ref45","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv:1412.6980"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref47","volume-title":"CUDA Zone","year":"2023"},{"key":"ref48","volume-title":"AMD is Losing the AI Battle, and It\u2019s Time It Started to Worry","author":"White","year":"2023"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3154842.3154843"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3041474"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93025-1_4"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-30504-7_8"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TST.2017.7830891"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/SERVICES.2013.55"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.25080\/majora-8b375195-003"},{"key":"ref57","article-title":"Low-level augmented Bayesian optimization for finding the best cloud VM","author":"Hsu","year":"2017","journal-title":"arXiv:1712.10081"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1080\/00949655.2014.1002101"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190541"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1201\/9781351003827-1"},{"key":"ref61","first-page":"267","article-title":"Unity: Accelerating DNN training through joint optimization of algebraic transformations and parallelization","volume-title":"Proc. USENIX OSDI","author":"Unger"},{"key":"ref62","article-title":"Alpa: Automating inter- and intra-operator parallelism for distributed deep learning","author":"Zheng","year":"2022","journal-title":"arXiv:2201.12023"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/3472456.3472523"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/3317689"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1970.tb01770.x"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1126\/science.220.4598.671"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/DAC.1982.1585498"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1287\/opre.37.6.865"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/43.159993"}],"container-title":["IEEE\/ACM Transactions on Networking"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/90\/10505042\/10284909.pdf?arnumber=10284909","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,19]],"date-time":"2024-04-19T17:36:49Z","timestamp":1713548209000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10284909\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4]]},"references-count":69,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tnet.2023.3321967","relation":{},"ISSN":["1063-6692","1558-2566"],"issn-type":[{"value":"1063-6692","type":"print"},{"value":"1558-2566","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4]]}}}