{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T05:46:46Z","timestamp":1767851206842,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":81,"publisher":"ACM","funder":[{"name":"China National Natural Science Foundation","award":["62088102, 623B2074"],"award-info":[{"award-number":["62088102, 623B2074"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,21]]},"DOI":"10.1145\/3695053.3731002","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T16:43:11Z","timestamp":1750437791000},"page":"1210-1224","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Topology-Aware Virtualization over Inter-Core Connected Neural Processing Units"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1146-8552","authenticated-orcid":false,"given":"Dahu","family":"Feng","sequence":"first","affiliation":[{"name":"Tsinghua university, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5957-3024","authenticated-orcid":false,"given":"Erhu","family":"Feng","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7945-8430","authenticated-orcid":false,"given":"Dong","family":"Du","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0882-6520","authenticated-orcid":false,"given":"Pinjie","family":"Xu","sequence":"additional","affiliation":[{"name":"SenseTime Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6558-5298","authenticated-orcid":false,"given":"Yubin","family":"Xia","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9720-0361","authenticated-orcid":false,"given":"Haibo","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2320-0326","authenticated-orcid":false,"given":"Rong","family":"Zhao","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527405"},{"key":"e_1_3_3_1_3_2","unstructured":"Alibaba. 2024. Qwen\/Qwen2-0.5B. https:\/\/huggingface.co\/Qwen\/Qwen2-0.5B. Referenced January 2024."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","unstructured":"Alon Amid David Biancolin Abraham Gonzalez Daniel Grubb Sagar Karandikar Harrison Liew Albert Magyar Howard Mao Albert Ou Nathan Pemberton Paul Rigge Colin Schmidt John Wright Jerry Zhao Yakun\u00a0Sophia Shao Krste Asanovi\u0107 and Borivoje Nikoli\u0107. 2020. Chipyard: Integrated Design Simulation and Implementation Framework for Custom SoCs. IEEE Micro 40 4 (2020) 10\u201321. 10.1109\/MM.2020.2996616","DOI":"10.1109\/MM.2020.2996616"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Ardalan Amiri\u00a0Sani Kevin Boos Shaopu Qin and Lin Zhong. 2014. I\/O paravirtualization at the device file boundary. ACM SIGARCH Computer Architecture News 42 1 (2014) 319\u2013332.","DOI":"10.1145\/2654822.2541943"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/2594368.2594370"},{"key":"e_1_3_3_1_7_2","unstructured":"AWS. 2024. GNeuronCore-v2 Architecture. https:\/\/awsdocs-neuron.readthedocs-hosted.com\/en\/latest\/general\/arch\/neuron-hardware\/neuron-core-v2.html. Referenced January 2024."},{"key":"e_1_3_3_1_8_2","unstructured":"AWS. 2024. Use Amazon SageMaker Built-in Algorithms or Pre-trained Models. https:\/\/docs.aws.amazon.com\/sagemaker\/latest\/dg\/algos.html. Referenced January 2024."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTERWKSP.2010.5613086"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Paul Barham Boris Dragovic Keir Fraser Steven Hand Tim Harris Alex Ho Rolf Neugebauer Ian Pratt and Andrew Warfield. 2003. Xen and the art of virtualization. ACM SIGOPS operating systems review 37 5 (2003) 164\u2013177.","DOI":"10.1145\/1165389.945462"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","unstructured":"Dongwei Chen Dong Tong Chun Yang Jiangfang Yi and Xu Cheng. 2023. FlexPointer: Fast Address Translation Based on Range TLB and Tagged Pointers. ACM Trans. Archit. Code Optim. 20 2 Article 30 (March 2023) 24\u00a0pages. 10.1145\/3579854","DOI":"10.1145\/3579854"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037700"},{"key":"e_1_3_3_1_13_2","unstructured":"Tianqi Chen Mu Li Yutian Li Min Lin Naiyan Wang Minjie Wang Tianjun Xiao Bing Xu Chiyuan Zhang and Zheng Zhang. 2015. MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems. arxiv:https:\/\/arXiv.org\/abs\/1512.01274\u00a0[cs.DC]"},{"key":"e_1_3_3_1_14_2","unstructured":"Alibaba Cloud. 2024. High GPU Utilization with cGPU. https:\/\/www.alibabacloud.com\/en\/solutions\/cgpu?_p_lc=1. Referenced January 2024."},{"key":"e_1_3_3_1_15_2","unstructured":"CNCF. 2024. HAMi: Heterogeneous AI Computing Virtualization Middleware. https:\/\/github.com\/Project-HAMi\/HAMi. Referenced November 2024."},{"key":"e_1_3_3_1_16_2","unstructured":"Jacob Devlin Ming-Wei Chang Kenton Lee and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.04805 (2018)."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Micah Dowty and Jeremy Sugerman. 2008. GPU virtualization on VMware\u2019s hosted I\/O architecture. ACM SIGOPS Oper. Syst. Rev. 43 (2008) 73\u201382. https:\/\/api.semanticscholar.org\/CorpusID:228328","DOI":"10.1145\/1618525.1618534"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14122-5_44"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2011.6152718"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCS.2010.5547126"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Erhu Feng Dahu Feng Dong Du Yubin Xia and Haibo Chen. 2024. sNPU: Trusted Execution Environments on Integrated NPUs. (2024).","DOI":"10.1109\/ISCA59077.2024.00057"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589348"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","unstructured":"Jayneel Gandhi Vasileios Karakostas Furkan Ayar Adri\u00e1n Cristal Mark\u00a0D. Hill Kathryn\u00a0S. McKinley Mario Nemirovsky Michael\u00a0M. Swift and Osman\u00a0S. \u00dcnsal. 2016. Range Translations for Fast Virtual Memory. IEEE Micro 36 3 (2016) 118\u2013126. 10.1109\/MM.2016.10","DOI":"10.1109\/MM.2016.10"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586216"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15277-1_37"},{"key":"e_1_3_3_1_26_2","unstructured":"Google. 2024. Extract insights from images documents and videos. https:\/\/cloud.google.com\/vision. Referenced January 2024."},{"key":"e_1_3_3_1_27_2","unstructured":"Google. 2024. TPU\/TPU v6e. https:\/\/cloud.google.com\/tpu\/docs\/v6e. Referenced February 2025."},{"key":"e_1_3_3_1_28_2","unstructured":"Graphcore. 2024. Intelligence processing unit. https:\/\/www.graphcore.ai\/products\/ipu. Referenced January 2024."},{"key":"e_1_3_3_1_29_2","unstructured":"Graphcore. 2024. IPU\/ipu-programmers-guide. https:\/\/docs.graphcore.ai\/projects\/ipu-programmers-guide\/en\/latest\/programming_tools.html. Referenced February 2025."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/1519138.1519141"},{"key":"e_1_3_3_1_31_2","volume-title":"2011 USENIX Annual Technical Conference (USENIX ATC 11)","author":"Gupta Vishakha","year":"2011","unstructured":"Vishakha Gupta, Karsten Schwan, Niraj Tolia, Vanish Talwar, and Parthasarathy Ranganathan. 2011. Pegasus: Coordinated scheduling for virtualized accelerator-based systems. In 2011 USENIX Annual Technical Conference (USENIX ATC 11)."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Peter\u00a0E Hart Nils\u00a0J Nilsson and Bertram Raphael. 1968. A formal basis for the heuristic determination of minimum cost paths. IEEE transactions on Systems Science and Cybernetics 4 2 (1968) 100\u2013107.","DOI":"10.1109\/TSSC.1968.300136"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"crossref","unstructured":"Kaiming He X. Zhang Shaoqing Ren and Jian Sun. 2015. Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015) 770\u2013778. https:\/\/api.semanticscholar.org\/CorpusID:206594692","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_35_2","unstructured":"Andrew\u00a0G Howard Menglong Zhu Bo Chen Dmitry Kalenichenko Weijun Wang Tobias Weyand Marco Andreetto and Hartwig Adam. 2017. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1704.04861 (2017)."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8621865"},{"key":"e_1_3_3_1_37_2","unstructured":"Bongjoon Hyun Youngeun Kwon Yujeong Choi John Kim and Minsoo Rhu. 2019. NeuMMU: Architectural Support for Efficient Address Translations in Neural Processing Units. Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems (2019). https:\/\/api.semanticscholar.org\/CorpusID:208139570"},{"key":"e_1_3_3_1_38_2","unstructured":"Intel. 2023. Intel Virtualization Technology for Directed I\/O Architecture Specification. https:\/\/cdrdv2-public.intel.com\/671081\/vt-directed-io-spec.pdf. Referenced April 2023."},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00014"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304623"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"crossref","unstructured":"Seah Kim Jerry Zhao Krste Asanovi\u0107 Borivoje Nikoli\u0107 and Yakun\u00a0Sophia Shao. 2023. AuRORA: Virtualized Accelerator Orchestration for Multi-Tenant Workloads. 2022 55th IEEE\/ACM International Symposium on Microarchitecture (MICRO) (2023).","DOI":"10.1145\/3613424.3614280"},{"key":"e_1_3_3_1_43_2","unstructured":"Alex Krizhevsky Ilya Sutskever and Geoffrey\u00a0E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 (2012)."},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"Yossi Kuperman Eyal Moscovici Joel Nider Razya Ladelsky Abel Gordon and Dan Tsafrir. 2016. Paravirtual remote i\/o. ACM SIGARCH Computer Architecture News 44 2 (2016) 49\u201365.","DOI":"10.1145\/2980024.2872378"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","unstructured":"K. Li H. Chen J. Sun and L. Shi. 2012. vCUDA: GPU-Accelerated High-Performance Computing in Virtual Machines. IEEE Trans. Comput. 61 06 (jun 2012) 804\u2013816. 10.1109\/TC.2011.112","DOI":"10.1109\/TC.2011.112"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2011.88"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/WAINA.2011.82"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"crossref","unstructured":"Sean Lie. 2023. Cerebras architecture deep dive: First look inside the hardware\/software co-design for deep learning. IEEE Micro 43 3 (2023) 18\u201330.","DOI":"10.1109\/MM.2023.3256384"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.76"},{"key":"e_1_3_3_1_50_2","unstructured":"Yiqi Liu Yuqi Xue Yu Cheng Lingxiao Ma Ziming Miao Jilong Xue and Jian Huang. 2024. Scaling Deep Learning Computation over the Inter-Core Connected Intelligence Processor. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.04808 (2024)."},{"key":"e_1_3_3_1_51_2","unstructured":"morenes. 2024. DCRA\/DCRA ae. https:\/\/github.com\/morenes\/dcra. Referenced February 2025."},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"publisher","DOI":"10.1007\/11815921_17"},{"key":"e_1_3_3_1_53_2","unstructured":"NVIDIA. 2023. NVIDIA Multi-Instance GPU. https:\/\/www.nvidia.com\/en-us\/technologies\/multi-instance-gpu\/. Referenced April 2023."},{"key":"e_1_3_3_1_54_2","unstructured":"NVIDIA. 2024. MULTI-PROCESS SERVICE. https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf. Referenced January 2024."},{"key":"e_1_3_3_1_55_2","unstructured":"NVIDIA. 2024. Unlock Next Level Performance with Virtual GPUs. https:\/\/www.nvidia.com\/en-us\/data-center\/virtual-solutions\/. Referenced January 2024."},{"key":"e_1_3_3_1_56_2","unstructured":"OpenAI. 2024. Introducing ChatGPT. https:\/\/openai.com\/index\/chatgpt\/. Referenced January 2024."},{"key":"e_1_3_3_1_57_2","unstructured":"Marcelo Orenes-Vera Esin Tureci Margaret Martonosi and David Wentzlaff. 2024. DCRA: A Distributed Chiplet-based Reconfigurable Architecture for Irregular Applications. arxiv:https:\/\/arXiv.org\/abs\/2311.15443\u00a0[cs.AR] https:\/\/arxiv.org\/abs\/2311.15443"},{"key":"e_1_3_3_1_58_2","doi-asserted-by":"crossref","unstructured":"Pratyush Patel Esha Choukse Chaojie Zhang Aashaka Shah \u00cd\u00f1igo Goiri Saeed Maleki and Ricardo Bianchini. 2024. Splitwise: Efficient generative LLM inference using phase splitting. arxiv:https:\/\/arXiv.org\/abs\/2311.18677\u00a0[cs.AR]","DOI":"10.1109\/ISCA59077.2024.00019"},{"key":"e_1_3_3_1_59_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42614.2022.9731612"},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2012.6507485"},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"crossref","unstructured":"Kaspar Riesen and Horst Bunke. 2009. Approximate graph edit distance computation by means of bipartite graph matching. Image and Vision computing 27 7 (2009) 950\u2013959.","DOI":"10.1016\/j.imavis.2008.04.004"},{"key":"e_1_3_3_1_62_2","first-page":"21","volume-title":"MLG","author":"Riesen Kaspar","year":"2007","unstructured":"Kaspar Riesen, Stefan Fankhauser, and Horst Bunke. 2007. Speeding up graph edit distance computation with a bipartite heuristic.. In MLG. Citeseer, 21\u201324."},{"key":"e_1_3_3_1_63_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358302"},{"key":"e_1_3_3_1_64_2","doi-asserted-by":"crossref","unstructured":"Yusuke Suzuki Shinpei Kato Hiroshi Yamada and Kenji Kono. 2016. GPUvm: GPU Virtualization at the Hypervisor. IEEE Trans. Comput. 65 (2016) 2752\u20132766. https:\/\/api.semanticscholar.org\/CorpusID:6941728","DOI":"10.1109\/TC.2015.2506582"},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/945445.945466"},{"key":"e_1_3_3_1_66_2","doi-asserted-by":"crossref","unstructured":"Christian Szegedy Wei Liu Yangqing Jia Pierre Sermanet Scott Reed Dragomir Anguelov Dumitru Erhan Vincent Vanhoucke and Andrew Rabinovich. 2014. Going Deeper with Convolutions. arxiv:https:\/\/arXiv.org\/abs\/1409.4842\u00a0[cs.CV]","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_3_1_67_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_3_1_68_2","doi-asserted-by":"publisher","unstructured":"Emil Talpes Debjit\u00a0Das Sarma Ganesh Venkataramanan Peter Bannon Bill McGee Benjamin Floering Ankit Jalote Christopher Hsiong Sahil Arora Atchyuth Gorti and Gagandeep\u00a0S. Sachdev. 2020. Compute Solution for Tesla\u2019s Full Self-Driving Computer. IEEE Micro 40 2 (2020) 25\u201335. 10.1109\/MM.2020.2975764","DOI":"10.1109\/MM.2020.2975764"},{"key":"e_1_3_3_1_69_2","doi-asserted-by":"publisher","DOI":"10.1109\/HCS55958.2022.9895534"},{"key":"e_1_3_3_1_70_2","unstructured":"tenstorrent. 2024. Tenstorrent - Scalable and Efficient Hardware for Deep Learning. https:\/\/tenstorrent.com\/. Referenced January 2024."},{"key":"e_1_3_3_1_71_2","volume-title":"USENIX Annual Technical Conference","author":"Tian Kun","year":"2014","unstructured":"Kun Tian, Yaozu Dong, and David\u00a0J. Cowperthwaite. 2014. A Full GPU Virtualization Solution with Mediated Pass-Through. In USENIX Annual Technical Conference. https:\/\/api.semanticscholar.org\/CorpusID:12658735"},{"key":"e_1_3_3_1_72_2","unstructured":"Hugo Touvron Louis Martin Kevin\u00a0R. Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Daniel\u00a0M. Bikel Lukas Blecher Cristian\u00a0Cant\u00f3n Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony\u00a0S. Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel\u00a0M. Kloumann A.\u00a0V. Korenev Punit\u00a0Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric\u00a0Michael Smith R. Subramanian Xia Tan Binh Tang Ross Taylor Adina Williams Jian\u00a0Xiang Kuan Puxin Xu Zhengxu Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. ArXiv abs\/2307.09288 (2023). https:\/\/api.semanticscholar.org\/CorpusID:259950998"},{"key":"e_1_3_3_1_73_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCSim.2016.7568395"},{"key":"e_1_3_3_1_74_2","doi-asserted-by":"crossref","unstructured":"Nandita Vijaykumar Kevin Hsieh Gennady Pekhimenko Samira\u00a0Manabi Khan Ashish Shrestha Saugata Ghose Adwait Jog Phillip\u00a0B. Gibbons and Onur Mutlu. 2016. Zorua: A holistic approach to resource virtualization in GPUs. 2016 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO) (2016) 1\u201314. https:\/\/api.semanticscholar.org\/CorpusID:2310493","DOI":"10.1109\/MICRO.2016.7783718"},{"key":"e_1_3_3_1_75_2","unstructured":"Junyang Wang Haiyang Xu Haitao Jia Xi Zhang Ming Yan Weizhou Shen Ji Zhang Fei Huang and Jitao Sang. 2024. Mobile-Agent-v2: Mobile Device Operation Assistant with Effective Navigation via Multi-Agent Collaboration. arxiv:https:\/\/arXiv.org\/abs\/2406.01014\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2406.01014"},{"key":"e_1_3_3_1_76_2","first-page":"35","volume-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Wei Michael","year":"2017","unstructured":"Michael Wei, Amy Tai, Christopher\u00a0J Rossbach, Ittai Abraham, Maithem Munshed, Medhavi Dhawan, Jim Stabile, Udi Wieder, Scott Fritchie, and Steven Swanson. 2017. { vCorfu} : A { Cloud-Scale} Object Store on a Shared Log. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17). 35\u201349."},{"key":"e_1_3_3_1_77_2","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2012.26"},{"key":"e_1_3_3_1_78_2","doi-asserted-by":"publisher","DOI":"10.1145\/3593856.3595912"},{"key":"e_1_3_3_1_79_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589059"},{"key":"e_1_3_3_1_80_2","unstructured":"Tsung\u00a0Tai Yeh Amit Sabne Putt Sakdhnagool Rudolf Eigenmann and Timothy\u00a0G. Rogers. 2017. Pagoda: Fine-Grained GPU Resource Virtualization for Narrow Tasks. Proceedings of the 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (2017). https:\/\/api.semanticscholar.org\/CorpusID:9810229"},{"key":"e_1_3_3_1_81_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378466"},{"key":"e_1_3_3_1_82_2","volume-title":"Symposium on Networked Systems Design and Implementation","author":"Zhang Kai","year":"2018","unstructured":"Kai Zhang, Bingsheng He, Jiayu Hu, Ze ke Wang, Bei Hua, Jiayi Meng, and Lishan Yang. 2018. G-NET: Effective GPU Sharing in NFV Systems. In Symposium on Networked Systems Design and Implementation. https:\/\/api.semanticscholar.org\/CorpusID:4493567"}],"event":{"name":"ISCA '25: Proceedings of the 52nd Annual International Symposium on Computer Architecture","location":"Tokyo Japan","acronym":"SIGARCH '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 52nd Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731002","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T11:06:04Z","timestamp":1750503964000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695053.3731002"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,20]]},"references-count":81,"alternative-id":["10.1145\/3695053.3731002","10.1145\/3695053"],"URL":"https:\/\/doi.org\/10.1145\/3695053.3731002","relation":{},"subject":[],"published":{"date-parts":[[2025,6,20]]},"assertion":[{"value":"2025-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}