{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T23:23:57Z","timestamp":1767828237069,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":102,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T00:00:00Z","timestamp":1750377600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2316694, 2402804, 2402805"],"award-info":[{"award-number":["2316694, 2402804, 2402805"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,21]]},"DOI":"10.1145\/3695053.3731040","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T16:43:11Z","timestamp":1750437791000},"page":"1064-1078","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["NetCrafter: Tailoring Network Traffic for Non-Uniform Bandwidth Multi-GPU Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-1925-8647","authenticated-orcid":false,"given":"Amel","family":"Fatima","sequence":"first","affiliation":[{"name":"University of Virginia, Charlottesville, Virginia, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6195-118X","authenticated-orcid":false,"given":"Yang","family":"Yang","sequence":"additional","affiliation":[{"name":"University of Virginia, Charlottesville, Virginia, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3532-6521","authenticated-orcid":false,"given":"Yifan","family":"Sun","sequence":"additional","affiliation":[{"name":"William &amp; Mary, Williamsburg, Virginia, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1459-0852","authenticated-orcid":false,"given":"Rachata","family":"Ausavarungnirun","sequence":"additional","affiliation":[{"name":"MangoBoost Inc., Skokie, Illinois, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5525-7204","authenticated-orcid":false,"given":"Adwait","family":"Jog","sequence":"additional","affiliation":[{"name":"University of Virginia, Charlottesville, Virginia, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378468"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","unstructured":"Neha Agarwal and Thomas\u00a0F. Wenisch. 2017. Thermostat: Application-transparent Page Management for Two-tiered Main Memory. SIGPLAN Not. 52 4 (2017) 631\u2013644. 10.1145\/3093336.3037706","DOI":"10.1145\/3093336.3037706"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/HCS49909.2020.9220636"},{"key":"e_1_3_3_2_5_2","unstructured":"AMD. 2015. AMD APP SDK OpenCL Optimization Guide."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","unstructured":"Akhil Arunkumar Evgeny Bolotin Benjamin Cho Ugljesa Milic Eiman Ebrahimi Oreste Villa Aamer Jaleel Carole-Jean Wu and David Nellans. 2017. MCM-GPU: Multi-Chip-Module GPUs for Continued Performance Scalability. ACM SIGARCH Computer Architecture News 45 (06 2017) 320\u2013332. 10.1145\/3140659.3080231","DOI":"10.1145\/3140659.3080231"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607089"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173169"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","unstructured":"Reza Azimi David\u00a0K. Tam Livio Soares and Michael Stumm. 2009. Enhancing operating system support for multicore processors by using hardware performance monitoring. SIGOPS Oper. Syst. Rev. 43 2 (apr 2009) 56\u201365. 10.1145\/1531793.1531803","DOI":"10.1145\/1531793.1531803"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00055"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3559009.3569649"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","unstructured":"Fran\u00e7ois Broquedis Nathalie Furmento Brice Goglin Pierre-Andr\u00e9 Wacrenier and Raymond Namyst. 2010. ForestGOMP: an efficient OpenMP environment for NUMA architectures. International Journal of Parallel Programming 38 (10 2010). 10.1007\/s10766-010-0136-3","DOI":"10.1007\/s10766-010-0136-3"},{"key":"e_1_3_3_2_13_2","volume-title":"PCI express system architecture","author":"Budruk Ravi","year":"2004","unstructured":"Ravi Budruk, Don Anderson, and Tom Shanley. 2004. PCI express system architecture. Addison-Wesley Professional."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Shuai Che Bradford\u00a0M. Beckmann Steven\u00a0K. Reinhardt and Kevin Skadron. 2013. Pannotia: Understanding irregular GPGPU graph applications. 2013 IEEE International Symposium on Workload Characterization (IISWC) (2013) 185\u2013195.","DOI":"10.1109\/IISWC.2013.6704684"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","unstructured":"Cen Chen Kenli Li Aijia Ouyang Zhuo Tang and Keqin Li. 2017. GPU-Accelerated Parallel Hierarchical Extreme Learning Machine on Flink for Big Data. IEEE Transactions on Systems Man and Cybernetics: Systems PP (04 2017) 1\u201314. 10.1109\/TSMC.2017.2690673","DOI":"10.1109\/TSMC.2017.2690673"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/1183401.1183451"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735702"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00029"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.23919\/ISC.2024.10528939"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451157"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC.2010.114"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2013.6495009"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3184407.3184423"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Shi Dong and David\u00a0R. Kaeli. 2017. DNNMark: A Deep Neural Network Benchmark Suite for GPUs. Proceedings of the General Purpose GPUs (2017).","DOI":"10.1145\/3038228.3038239"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589080"},{"key":"e_1_3_3_2_26_2","unstructured":"Argonne Leadership\u00a0Computing Facility. 2022. Aurora. https:\/\/www.alcf.anl.gov\/support-center\/aurora-sunspot"},{"key":"e_1_3_3_2_27_2","unstructured":"Oak Ridge Leadership\u00a0Computing Facility. 2022. Frontier User Guide - System Overview. https:\/\/docs.olcf.ornl.gov\/systems\/frontier_user_guide.html#id2"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/264107.264205"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247745"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00065"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2017.36"},{"key":"e_1_3_3_2_32_2","unstructured":"Priya Goyal Piotr Doll\u00e1r Ross\u00a0B. Girshick Pieter Noordhuis Lukasz Wesolowski Aapo Kyrola Andrew Tulloch Yangqing Jia and Kaiming He. 2017. Accurate Large Minibatch SGD: Training ImageNet in 1 Hour. ArXiv abs\/1706.02677 (2017)."},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/InPar.2012.6339595"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3650200.3656591"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2011.59"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414623"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00047"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","unstructured":"Van Jacobson Robert Braden and David Borman. 1992. TCP Extensions for High Performance. RFC 1323 https:\/\/www.rfc-editor.org\/rfc\/rfc1323.txt. 10.17487\/RFC1323 Accessed: 2024-11-20.","DOI":"10.17487\/RFC1323"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Colin\u00a0L. Jermain Graham\u00a0E. Rowlands Robert\u00a0A. Buhrman and Daniel\u00a0C. Ralph. 2015. GPU-accelerated micromagnetic simulations using cloud computing. Journal of Magnetism and Magnetic Materials 401 (2015) 320\u2013322.","DOI":"10.1016\/j.jmmm.2015.10.054"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","unstructured":"Hai Jiang Yi Chen Zhi Qiao Tien-Hsiung Weng and Kuan-Ching Li. 2015. Scaling up MapReduce-based Big Data Processing on Multi-GPU systems. Cluster Computing 18 1 (mar 2015) 369\u2013383. 10.1007\/s10586-014-0400-1","DOI":"10.1007\/s10586-014-0400-1"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","unstructured":"Adwait Jog Onur Kayiran Nachiappan Chidambaram\u00a0Nachiappan Asit\u00a0K. Mishra Mahmut\u00a0T. Kandemir Onur Mutlu Ravishankar Iyer and Chita\u00a0R. Das. 2013. OWL: cooperative thread array aware scheduling techniques for improving GPGPU performance(ASPLOS \u201913). Association for Computing Machinery New York NY USA 395\u2013406. 10.1145\/2451116.2451158","DOI":"10.1145\/2451116.2451158"},{"key":"e_1_3_3_2_42_2","first-page":"437","volume-title":"2016 USENIX Annual Technical Conference (USENIX ATC 16)","author":"Kalia Anuj","year":"2016","unstructured":"Anuj Kalia, Michael Kaminsky, and David\u00a0G Andersen. 2016. Design guidelines for high performance { RDMA} systems. In 2016 USENIX Annual Technical Conference (USENIX ATC 16). 437\u2013450."},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00086"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00047"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2017.7927220"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","unstructured":"Hyojong Kim Ramyad Hadidi Lifeng Nai Hyesoon Kim Nuwan Jayasena Yasuko Eckert Onur Kayiran and Gabriel Loh. 2018. CODA: Enabling Co-location of Computation and Data for Multiple GPU Systems. ACM Trans. Archit. Code Optim. 15 3 Article 32 (Sept. 2018) 23\u00a0pages. 10.1145\/3232521","DOI":"10.1145\/3232521"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00095"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","unstructured":"Alex Krizhevsky Ilya Sutskever and Geoffrey\u00a0E. Hinton. 2017. ImageNet classification with deep convolutional neural networks. Commun. ACM 60 6 (may 2017) 84\u201390. 10.1145\/3065386","DOI":"10.1145\/3065386"},{"key":"e_1_3_3_2_49_2","unstructured":"Ya Le and Xuan Yang. 2015. Tiny imagenet visual recognition challenge. CS 231N 7 7 (2015) 3."},{"key":"e_1_3_3_2_50_2","unstructured":"Yann LeCun and Corinna Cortes. 2005. The mnist database of handwritten digits."},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527420"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835937"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480076"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","unstructured":"Ang Li Shuaiwen\u00a0Leon Song Jieyang Chen Jiajia Li Xu Liu Nathan\u00a0R. Tallent and Kevin\u00a0J. Barker. 2020. Evaluating Modern GPU Interconnect: PCIe NVLink NV-SLI NVSwitch and GPUDirect. IEEE Transactions on Parallel and Distributed Systems 31 1 (2020) 94\u2013110. 10.1109\/TPDS.2019.2928289","DOI":"10.1109\/TPDS.2019.2928289"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2018.8573483"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037709"},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614269"},{"key":"e_1_3_3_2_58_2","unstructured":"Bingyao Li Yueqi Wang Tianyu Wang Lieven Eeckhout Jun Yang Aamer Jaleel and Xulong Tang. 2024. Improving Multi-Instance GPU Efficiency via Sub-Entry Sharing TLB Design. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.18361 (2024)."},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071054"},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480083"},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322259"},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124534"},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10070949"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230560"},{"key":"e_1_3_3_2_65_2","unstructured":"NVIDIA. 2016. NVIDIA Tesla P100: The Most Advanced Datacenter Accelerator Ever Built. https:\/\/images.nvidia.com\/content\/pdf\/tesla\/whitepaper\/pascalarchitecture-whitepaper.pdf Accessed: 2017-09-19."},{"key":"e_1_3_3_2_66_2","unstructured":"NVIDIA. 2017. NVIDIA DGX-1 System Architecture White paper."},{"key":"e_1_3_3_2_67_2","volume-title":"NVIDIA DGX-2","year":"2018","unstructured":"NVIDIA. 2018. NVIDIA DGX-2. https:\/\/www.nvidia.com\/en-us\/ data-center\/dgx-2\/"},{"key":"e_1_3_3_2_68_2","unstructured":"NVIDIA Corporation. 2024. Performance Tuning for Mellanox Adapters. https:\/\/enterprise-support.nvidia.com\/s\/article\/performance-tuning-for-mellanox-adapters. Accessed: 2024-11-20."},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507718"},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00029"},{"key":"e_1_3_3_2_71_2","unstructured":"PCI-SIG. 2017. PCI-SIG Releases PCIe\u00ae 4.0 Version 1.0. https:\/\/pcisig.com\/pci-sig-releases-pcie%C2%AE-40-version-10. Accessed: August 2 2024."},{"key":"e_1_3_3_2_72_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00036"},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"publisher","unstructured":"Petar Radojkovi\u0107 Vladimir \u010cakarevi\u0107 Miquel Moret\u00f3 Javier Verd\u00fa Alex Pajuelo Francisco\u00a0J. Cazorla Mario Nemirovsky and Mateo Valero. 2012. Optimal task assignment in multithreaded processors: a statistical approach. SIGARCH Comput. Archit. News 40 1 (mar 2012) 235\u2013248. 10.1145\/2189750.2151002","DOI":"10.1145\/2189750.2151002"},{"key":"e_1_3_3_2_74_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00054"},{"key":"e_1_3_3_2_75_2","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540717"},{"key":"e_1_3_3_2_76_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCC.2009.5202271"},{"key":"e_1_3_3_2_77_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2016.7761599"},{"key":"e_1_3_3_2_78_2","volume-title":"Symposium on Networked Systems Design and Implementation","author":"Sapio Amedeo","year":"2019","unstructured":"Amedeo Sapio, Marco Canini, Chen-Yu Ho, Jacob Nelson, Panos Kalnis, Changhoon Kim, Arvind Krishnamurthy, Masoud Moshref, Dan R.\u00a0K. Ports, and Peter Richt\u00e1rik. 2019. Scaling Distributed Machine Learning with In-Network Aggregation. In Symposium on Networked Systems Design and Implementation."},{"key":"e_1_3_3_2_79_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.1995.386535"},{"key":"e_1_3_3_2_80_2","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2007.697"},{"key":"e_1_3_3_2_81_2","doi-asserted-by":"publisher","DOI":"10.1145\/3617232.3624868"},{"key":"e_1_3_3_2_82_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378493"},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1992.753306"},{"key":"e_1_3_3_2_84_2","doi-asserted-by":"publisher","unstructured":"C.\u00a0B. Stunkel R.\u00a0L. Graham G. Shainer M. Kagan S.\u00a0S. Sharkawi B. Rosenburg and G.\u00a0A. Chochia. 2020. The high-speed networks of the Summit and Sierra supercomputers. IBM Journal of Research and Development 64 3\/4 (2020) 3:1\u20133:10. 10.1147\/JRD.2020.2967330","DOI":"10.1147\/JRD.2020.2967330"},{"key":"e_1_3_3_2_85_2","unstructured":"Yifan Sun Nicolas\u00a0Bohm Agostini Dong Shi and David Kaeli. 2019. Summarizing CPU and GPU design trends with product data. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1911.11313 (2019)."},{"key":"e_1_3_3_2_86_2","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322230"},{"key":"e_1_3_3_2_87_2","unstructured":"Yifan Sun Trinayan Baruah Saiful\u00a0A Mojumder Shi Dong Rafael Ubal Xiang Gong Shane Treadway Yuhui Bao Vincent Zhao Jos\u00e9\u00a0L Abell\u00e1n et\u00a0al. 2018. Mgsim+ mgmark: A framework for multi-gpu system research. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1811.02884 (2018)."},{"key":"e_1_3_3_2_88_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2016.7581262"},{"key":"e_1_3_3_2_89_2","unstructured":"Konstantin Taranov Fabian Fischer and Torsten Hoefler. 2022. Efficient RDMA Communication Protocols. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.09134 (2022)."},{"key":"e_1_3_3_2_90_2","unstructured":"Mellanox Technologies. [n. d.]. Performance Tuning Guidelines for Mellanox Network Adapters."},{"key":"e_1_3_3_2_91_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00055"},{"key":"e_1_3_3_2_92_2","doi-asserted-by":"publisher","unstructured":"Guan Wang Chuanqi Zang Lei Ju Mengying Zhao Xiaojun Cai and Zhiping Jia. 2018. Shared Last-Level Cache Management and Memory Scheduling for GPGPUs with Hybrid Main Memory. ACM Trans. Embed. Comput. Syst. 17 4 Article 77 (jul 2018) 25\u00a0pages. 10.1145\/3230643","DOI":"10.1145\/3230643"},{"key":"e_1_3_3_2_93_2","doi-asserted-by":"publisher","DOI":"10.1109\/NYSDS.2017.8085036"},{"key":"e_1_3_3_2_94_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00085"},{"key":"e_1_3_3_2_95_2","unstructured":"Ren Wu Shengen Yan Yi Shan Qingqing Dang and Gang Sun. 2015. Deep Image: Scaling up Image Recognition. ArXiv abs\/1501.02876 (2015)."},{"key":"e_1_3_3_2_96_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056023"},{"key":"e_1_3_3_2_97_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00035"},{"key":"e_1_3_3_2_98_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00035"},{"key":"e_1_3_3_2_99_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589078"},{"key":"e_1_3_3_2_100_2","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322235"},{"key":"e_1_3_3_2_101_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00082"},{"key":"e_1_3_3_2_102_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575745"},{"key":"e_1_3_3_2_103_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPPW.2015.20"}],"event":{"name":"ISCA '25: Proceedings of the 52nd Annual International Symposium on Computer Architecture","location":"Tokyo Japan","acronym":"SIGARCH '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 52nd Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731040","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731040","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T10:58:37Z","timestamp":1750503517000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695053.3731040"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,20]]},"references-count":102,"alternative-id":["10.1145\/3695053.3731040","10.1145\/3695053"],"URL":"https:\/\/doi.org\/10.1145\/3695053.3731040","relation":{},"subject":[],"published":{"date-parts":[[2025,6,20]]},"assertion":[{"value":"2025-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}