{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,22]],"date-time":"2026-07-22T15:26:22Z","timestamp":1784733982820,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,22]],"date-time":"2023-06-22T00:00:00Z","timestamp":1687392000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,22]]},"DOI":"10.1145\/3593856.3595912","type":"proceedings-article","created":{"date-parts":[[2023,6,22]],"date-time":"2023-06-22T22:20:41Z","timestamp":1687472441000},"page":"80-86","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["System Virtualization for Neural Processing Units"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-0363-9486","authenticated-orcid":false,"given":"Yuqi","family":"Xue","sequence":"first","affiliation":[{"name":"University of Illinois Urbana Champaign, Urbana, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8171-4970","authenticated-orcid":false,"given":"Yiqi","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana Champaign, Urbana, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1125-671X","authenticated-orcid":false,"given":"Jian","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana Champaign, Urbana, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,6,22]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Altexsoft. 2021. Comparing Machine Learning as a Service: Amazon Microsoft Azure Google Cloud AI IBM Watson. https:\/\/www.altexsoft.com\/blog\/datascience\/comparing-machine-learning-as-a-service-amazon-microsoft-azure-google-cloud-ai-ibm-watson\/"},{"key":"e_1_3_2_1_2_1","unstructured":"The Kubernetes Authors. 2023. Kubernetes Scheduler. https:\/\/kubernetes.io\/docs\/concepts\/scheduling-eviction\/kube-scheduler\/"},{"key":"e_1_3_2_1_3_1","unstructured":"Amazon AWS. 2022. Machine Learning on AWS Innovate faster with the most comprehensive set of AI and ML services. https:\/\/aws.amazon.com\/machine-learning\/"},{"key":"e_1_3_2_1_4_1","unstructured":"Amazon AWS. 2023. AWS Inferentia. https:\/\/aws.amazon.com\/machine-learning\/inferentia\/"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037700"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872368"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18). Carlsbad, CA."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of HotChips'17","author":"Chung Eric","year":"2017","unstructured":"Eric Chung, Jeremy Fowers, Kalin Ovtcharov, Michael Papamichael, Adrian Caulfield, Todd Massengil, Ming Liu, Daniel Lo, Shlomi Alkalay, Michael Haselman, Christian Boehn, Oren Firestein, Alessandro Forin, Kang Su Gatlin, Mahdi Ghandi, Stephen Heil, Kyle Holohan, Tamas Juhasz, Ratna Kumar Kovvuri, Sitaram Lanka, Friedel van Megen, Dima Mukhortov, Prerak Patel, Steve Reinhardt, Adam Sapek, Raja Seera, Balaji Sridharan, Lisa Woods, Phillip Yi-Xiao, Ritchie Zhao, and Doug Burger. 2017. Accelerating Persistent Neural Networks at Datacenter Scale. In Proceedings of HotChips'17. Cupertino, CA."},{"key":"e_1_3_2_1_10_1","unstructured":"Alibaba Clouder. 2019. Alibaba Unveils AI Chip to Enhance Cloud Computing Power. https:\/\/www.alibabacloud.com\/blog\/alibaba-unveils-ai-chip-to-enhance-cloud-computing-power_595409"},{"key":"e_1_3_2_1_11_1","unstructured":"The KubeVirt Contributors. 2023. KubeVirt.io. https:\/\/kubevirt.io\/"},{"key":"e_1_3_2_1_12_1","unstructured":"Google. 2022. System Architecture - Cloud TPU. https:\/\/cloud.google.com\/tpu\/docs\/system-architecture-tpu-vm"},{"key":"e_1_3_2_1_13_1","unstructured":"Google. 2023. Supported reference models. https:\/\/cloud.google.com\/tpu\/docs\/tutorials\/supported-models"},{"key":"e_1_3_2_1_14_1","unstructured":"Graphcore. 2022. Graphcore IPU Overview. https:\/\/www.graphcore.ai\/products\/ipu"},{"key":"e_1_3_2_1_15_1","volume-title":"Graphcloud: Cloud-based Machine Intelligence. https:\/\/www.graphcore.ai\/graphcloud","year":"2023","unstructured":"Graphcore. 2023. Graphcloud: Cloud-based Machine Intelligence. https:\/\/www.graphcore.ai\/graphcloud"},{"key":"e_1_3_2_1_16_1","unstructured":"Linley Gwennap. 2020. Tenstorrent Scales AI Performance: New Multicore Architecture Leads in Data-Center Power Efficiency. https:\/\/www.linleygroup.com\/mpr\/article.php?id=12287"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378494"},{"key":"e_1_3_2_1_18_1","unstructured":"Neo Jia and Kirti Wankhede. 2023. VFIO Mediated devices. https:\/\/docs.kernel.org\/driver-api\/vfio-mediated-device.html"},{"key":"e_1_3_2_1_19_1","first-page":"7","article-title":"A Domain-Specific Supercomputer for Training Deep Neural","volume":"63","author":"Jouppi Norman P.","year":"2020","unstructured":"Norman P. Jouppi, Doe Hyun Yoon, George Kurian, Sheng Li, Nishant Patil, James Laudon, Cliff Young, and David Patterson. 2020. A Domain-Specific Supercomputer for Training Deep Neural Networks. Commun. ACM 63, 7 (June 2020).","journal-title":"Networks. Commun. ACM"},{"key":"e_1_3_2_1_20_1","unstructured":"The kernel development community. 2023. Linux KVM Hypercall. https:\/\/docs.kernel.org\/virt\/kvm\/x86\/hypercalls.html"},{"key":"e_1_3_2_1_21_1","unstructured":"Wolfram Alpha LLC. 2023. WolframAlpha: Computational Intelligence. https:\/\/www.wolframalpha.com\/"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2749475"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155650"},{"key":"e_1_3_2_1_24_1","unstructured":"Nvidia. 2022. Multi-Instance GPU User Guide. https:\/\/docs.nvidia.com\/datacenter\/tesla\/mig-user-guide\/"},{"key":"e_1_3_2_1_25_1","unstructured":"Ejiro Onose. 2022. Machine Learning as a Service: What It Is When to Use It and What Are the Best Tools Out There. https:\/\/neptune.ai\/blog\/machine-learning-as-a-service-what-it-is-when-to-use-it-and-what-are-the-best-tools-out-there"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 30th International Conference on Neural Information Processing Systems (NIPS'17)","author":"Paszke Adam","year":"2017","unstructured":"Adam Paszke, Sam Gross, Soumith Chintala, Gregory Chanan, Edward Yang, Zachary DeVito, Zeming Lin, Alban Desmaison, Luca Antiga, and Adam Lerer. 2017. Automatic Differentiation in PyTorch. In Proceedings of the 30th International Conference on Neural Information Processing Systems (NIPS'17). Long Beach, CA."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541942"},{"key":"e_1_3_2_1_28_1","volume-title":"Dilip Sequeira, Ashish Sirasao, Fei Sun, Hanlin Tang, Michael Thomson, Frank Wei, Ephrem Wu, Lingjie Xu, Koichi Yamada, Bing Yu, George Yuan, Aaron Zhong, Peizhao Zhang, and Yuchen Zhou.","author":"Reddi Vijay Janapa","year":"2020","unstructured":"Vijay Janapa Reddi, Christine Cheng, David Kanter, Peter Mattson, Guenther Schmuelling, Carole-Jean Wu, Brian Anderson, Maximilien Breughe, Mark Charlebois, William Chou, Ramesh Chukka, Cody Coleman, Sam Davis, Pan Deng, Greg Diamos, Jared Duke, Dave Fick, J. Scott Gardner, Itay Hubara, Sachin Idgunji, Thomas B. Jablin, Jeff Jiao, Tom St. John, Pankaj Kanwar, David Lee, Jeffery Liao, Anton Lokhmotov, Francisco Massa, Peng Meng, Paulius Micikevicius, Colin Osborne, Gennady Pekhimenko, Arun Tejusve Raghunath Rajan, Dilip Sequeira, Ashish Sirasao, Fei Sun, Hanlin Tang, Michael Thomson, Frank Wei, Ephrem Wu, Lingjie Xu, Koichi Yamada, Bing Yu, George Yuan, Aaron Zhong, Peizhao Zhang, and Yuchen Zhou. 2020. MLPerf Inference Benchmark. arXiv:1911.02549"},{"key":"e_1_3_2_1_29_1","unstructured":"RUN:AI. 2022. Google TPU Architecture and Performance Best Practices. https:\/\/www.run.ai\/guides\/cloud-deep-learning\/google-tpu"},{"key":"e_1_3_2_1_30_1","unstructured":"Alexander Spiridonov. 2021. New Cloud TPU VMs make training your ML models on TPUs easier than ever. https:\/\/cloud.google.com\/blog\/products\/compute\/introducing-cloud-tpu-vms"},{"key":"e_1_3_2_1_31_1","unstructured":"Google TensorFlow. 2023. Create production-grade machine learning models with TensorFlow. https:\/\/www.tensorflow.org\/"},{"key":"e_1_3_2_1_32_1","volume-title":"XLA: Optimizing Compiler for Machine Learning. https:\/\/www.tensorflow.org\/xla","author":"TensorFlow Google","year":"2023","unstructured":"Google TensorFlow. 2023. XLA: Optimizing Compiler for Machine Learning. https:\/\/www.tensorflow.org\/xla"},{"key":"e_1_3_2_1_33_1","unstructured":"Haifeng Wang. 2019. HUAWEI CLOUD Enables More Intelligence with Its AI Chips. https:\/\/www.huaweicloud.com\/intl\/en-us\/cloudplus\/thirdphase\/detail_12.html"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589059"}],"event":{"name":"HotOS '23: 19th Workshop on Hot Topics in Operating Systems","location":"Providence RI USA","acronym":"HOTOS '23","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 19th Workshop on Hot Topics in Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3593856.3595912","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3593856.3595912","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:51Z","timestamp":1750178871000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3593856.3595912"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,22]]},"references-count":34,"alternative-id":["10.1145\/3593856.3595912","10.1145\/3593856"],"URL":"https:\/\/doi.org\/10.1145\/3593856.3595912","relation":{},"subject":[],"published":{"date-parts":[[2023,6,22]]},"assertion":[{"value":"2023-06-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}