{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T16:30:07Z","timestamp":1759336207625,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,13]]},"DOI":"10.1145\/3731569.3764825","type":"proceedings-article","created":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T12:43:24Z","timestamp":1759322604000},"page":"341-358","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Fast End-to-End Performance Simulation of Accelerated Hardware-Software Stacks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-6907-7489","authenticated-orcid":false,"given":"Jiacheng","family":"Ma","sequence":"first","affiliation":[{"name":"EPFL, Lausanne, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2098-3067","authenticated-orcid":false,"given":"Jonas","family":"Kaufmann","sequence":"additional","affiliation":[{"name":"Max Planck Institute for Software Systems (MPI-SWS), Saarbr\u00fccken, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2452-6266","authenticated-orcid":false,"given":"Emilien","family":"Guandalino","sequence":"additional","affiliation":[{"name":"EPFL, Lausanne, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5599-8636","authenticated-orcid":false,"given":"Rishabh","family":"Iyer","sequence":"additional","affiliation":[{"name":"UC Berkeley, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8468-8409","authenticated-orcid":false,"given":"Thomas","family":"Bourgeat","sequence":"additional","affiliation":[{"name":"EPFL, Lausanne, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8107-6535","authenticated-orcid":false,"given":"George","family":"Candea","sequence":"additional","affiliation":[{"name":"EPFL, Lausanne, Switzerland"}]}],"member":"320","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Intl. Conf. on Advanced Computing and Communication Systems","author":"Adarsh P.","year":"2020","unstructured":"Adarsh, P., Rathi, P., and Kumar, M. Yolo v3-tiny: Object detection and recognition using one stage improved model. In Intl. Conf. on Advanced Computing and Communication Systems (2020)."},{"key":"e_1_3_2_1_2_1","volume-title":"Intl. Symp. on Computer Architecture","author":"Altaf M. S. B.","year":"2017","unstructured":"Altaf, M. S. B., and Wood, D. A. LogCA: A high-level performance model for hardware accelerators. In Intl. Symp. on Computer Architecture (2017)."},{"key":"e_1_3_2_1_3_1","volume-title":"AWS Nitro System. https:\/\/aws.amazon.com\/ec2\/nitro\/","author":"Amazon","year":"2017","unstructured":"Amazon. AWS Nitro System. https:\/\/aws.amazon.com\/ec2\/nitro\/, 2017."},{"key":"e_1_3_2_1_4_1","volume-title":"https:\/\/aws.amazon.com\/ai\/machine-learning\/inferentia\/","author":"Amazon","year":"2025","unstructured":"Amazon. Amazon Inferentia. https:\/\/aws.amazon.com\/ai\/machine-learning\/inferentia\/, 2025."},{"key":"e_1_3_2_1_5_1","volume-title":"https:\/\/aws.amazon.com\/ai\/machine-learning\/trainium\/","author":"Amazon","year":"2025","unstructured":"Amazon. Amazon Trainium. https:\/\/aws.amazon.com\/ai\/machine-learning\/trainium\/, 2025."},{"key":"e_1_3_2_1_6_1","volume-title":"Apache versatile tensor accelerator. https:\/\/tvm.apache.org\/docs\/v0.9.0\/topic\/vta","author":"Apache TVM","year":"2018","unstructured":"Apache TVM. Apache versatile tensor accelerator. https:\/\/tvm.apache.org\/docs\/v0.9.0\/topic\/vta, 2018."},{"key":"e_1_3_2_1_7_1","volume-title":"COTSon: Infrastructure for full system simulation. ACM SIGOPS Operating Systems Review","author":"Argollo E.","year":"2009","unstructured":"Argollo, E., Falc\u00f3n, A., Faraboschi, P., Monchiero, M., and Ortega, D. COTSon: Infrastructure for full system simulation. ACM SIGOPS Operating Systems Review (2009)."},{"key":"e_1_3_2_1_8_1","volume-title":"AXI4, AXI4-Lite, and AXI4-Stream","author":"Ltd","year":"2010","unstructured":"ARM Ltd. AMBA AXI and ACE protocol specification, AXI4, AXI4-Lite, and AXI4-Stream, 2010. Available from https:\/\/developer.arm.com\/architectures\/system-architectures\/amba."},{"key":"e_1_3_2_1_9_1","volume-title":"The gem5 simulator. SIGARCH Comput. Archit. News","author":"Binkert N. L.","year":"2011","unstructured":"Binkert, N. L., Beckmann, B. M., Black, G., Reinhardt, S. K., Saidi, A. G., Basu, A., Hestness, J., Hower, D., Krishna, T., Sardashti, S., Sen, R., Sewell, K., Altaf, M. S. B., Vaish, N., Hill, M. D., and Wood, D. A. The gem5 simulator. SIGARCH Comput. Archit. News (2011)."},{"key":"e_1_3_2_1_10_1","unstructured":"Cadence Design Systems Inc. Xcelium logic simulator. https:\/\/www.cadence.com\/en_US\/home\/tools\/system-design-and-verification\/simulation-and-testbench-verification\/xcelium-simulator.html 2017."},{"key":"e_1_3_2_1_11_1","volume-title":"Intl. Conf. for High Performance Computing, Networking, Storage and Analysis","author":"Carlson T. E.","year":"2011","unstructured":"Carlson, T. E., Heirman, W., and Eeckhout, L. Sniper: Exploring the level of abstraction for scalable and accurate parallel multi-core simulation. In Intl. Conf. for High Performance Computing, Networking, Storage and Analysis (2011)."},{"key":"e_1_3_2_1_12_1","volume-title":"Intl. Conf. on Very Large Databases","author":"Chiosa M.","year":"2022","unstructured":"Chiosa, M., Maschi, F., M\u00fcller, I., Alonso, G., and May, N. Hardware acceleration of compression and encryption in SAP HANA. In Intl. Conf. on Very Large Databases (2022)."},{"key":"e_1_3_2_1_13_1","volume-title":"Intl. Conf. on Embedded Computer Systems: Architectures, Modeling, and Simulation","author":"Cubero-Cascante J.","year":"2023","unstructured":"Cubero-Cascante, J., Zurstra\u00dfen, N., N\u00f6ller, J., Leupers, R., and Joseph, J. M. parti-gem5: gem5's timing mode parallelised. In Intl. Conf. on Embedded Computer Systems: Architectures, Modeling, and Simulation (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"Symp. on Principles and Practice of Parallel Computing","author":"Culler D. E.","year":"1993","unstructured":"Culler, D. E., Karp, R., Patterson, D., Sahay, A., Schauser, K. E., Santos, E., Subramonian, R., and von Eicken, T. LogP: Towards a realistic model of parallel computation. In Symp. on Principles and Practice of Parallel Computing (1993)."},{"key":"e_1_3_2_1_15_1","volume-title":"Symp. on Operating Systems Principles","author":"Curtsinger C.","year":"2015","unstructured":"Curtsinger, C., and Berger, E. D. Coz: Finding code that counts with causal profiling. In Symp. on Operating Systems Principles (2015)."},{"key":"e_1_3_2_1_16_1","volume-title":"Accelerating Facebook's infrastructure with application-specific hardware. https:\/\/engineering.fb.com\/2019\/03\/14\/data-center-engineering\/accelerating-infrastructure\/","author":"Facebook","year":"2019","unstructured":"Facebook. Accelerating Facebook's infrastructure with application-specific hardware. https:\/\/engineering.fb.com\/2019\/03\/14\/data-center-engineering\/accelerating-infrastructure\/, 2019."},{"key":"e_1_3_2_1_17_1","volume-title":"IEEE\/ACM Intl. Symp. on Microarchitecture","author":"Fung W. W.","year":"2007","unstructured":"Fung, W. W., Sham, I., Yuan, G., and Aamodt, T. M. Dynamic warp formation and scheduling for efficient GPU control flow. In IEEE\/ACM Intl. Symp. on Microarchitecture (2007)."},{"key":"e_1_3_2_1_18_1","volume-title":"ld.so(8) \u2013 dynamic linker\/loader. Linux man-pages project","author":"Project","year":"2025","unstructured":"GNU Project. ld.so(8) \u2013 dynamic linker\/loader. Linux man-pages project, 2025."},{"key":"e_1_3_2_1_19_1","volume-title":"ptrace(2). Linux man-pages project","author":"Project","year":"2025","unstructured":"GNU Project. ptrace(2). Linux man-pages project, 2025."},{"key":"e_1_3_2_1_20_1","volume-title":"https:\/\/github.com\/google\/HyperProtoBench","author":"Google","year":"2021","unstructured":"Google. HyperProtoBench. https:\/\/github.com\/google\/HyperProtoBench, 2021."},{"key":"e_1_3_2_1_21_1","unstructured":"Google. Protocol buffers. http:\/\/code.google.com\/p\/protobuf\/ 2021."},{"key":"e_1_3_2_1_22_1","volume-title":"IEEE\/ACM Intl. Symp. on Microarchitecture","author":"Guo Z.","year":"2023","unstructured":"Guo, Z., Lin, J., Bai, Y., Kim, D., Swift, M., Akella, A., and Liu, M. LogNIC: Ahigh-level performance model for SmartNICs. In IEEE\/ACM Intl. Symp. on Microarchitecture (2023)."},{"key":"e_1_3_2_1_23_1","volume-title":"Conf. on Computer Vision and Pattern Recognition","author":"He K.","year":"2016","unstructured":"He, K., Zhang, X., Ren, S., and Sun, J. Deep residual learning for image recognition. In Conf. on Computer Vision and Pattern Recognition (2016)."},{"key":"e_1_3_2_1_24_1","volume-title":"Infrastructure processing unit (Intel IPU) ASIC E2000. https:\/\/www.intel.de\/content\/www\/de\/de\/products\/network-io\/infrastructure-processing-units\/asic\/e2000-asic.html","author":"Intel","year":"2022","unstructured":"Intel. Infrastructure processing unit (Intel IPU) ASIC E2000. https:\/\/www.intel.de\/content\/www\/de\/de\/products\/network-io\/infrastructure-processing-units\/asic\/e2000-asic.html, 2022."},{"key":"e_1_3_2_1_25_1","volume-title":"QAT: Accelerating data compression and encryption. https:\/\/www.intel.com\/content\/www\/us\/en\/architecture-and-technology\/intel-quick-assist-technology-overview.html","author":"Intel","year":"2025","unstructured":"Intel. QAT: Accelerating data compression and encryption. https:\/\/www.intel.com\/content\/www\/us\/en\/architecture-and-technology\/intel-quick-assist-technology-overview.html, 2025."},{"key":"e_1_3_2_1_26_1","unstructured":"Jin H.-Q. Frumkin M. and Yan J. The OpenMP implementation of NAS parallel benchmarks and its performance. Tech. Rep. NAS-99-011 NASA Ames Research Center 1999."},{"key":"e_1_3_2_1_27_1","volume-title":"Intl. Symp. on Computer Architecture","author":"Jouppi N. P.","year":"2021","unstructured":"Jouppi, N. P., Yoon, D. H., Ashcraft, M., Gottscho, M., Jablin, T. B., Kurian, G., Laudon, J., Li, S., Ma, P. C., Ma, X., Norrie, T., Patil, N., Prasad, S., Young, C., Zhou, Z., and Patterson, D. A. Ten lessons from three generations shaped Google's TPUv4i (industrial product). In Intl. Symp. on Computer Architecture (2021)."},{"key":"e_1_3_2_1_28_1","volume-title":"Intl. Symp. on Computer Architecture","author":"Jouppi N. P.","year":"2017","unstructured":"Jouppi, N. P., Young, C., Patil, N., Patterson, D. A., Agrawal, G., Bajwa, R., Bates, S., Bhatia, S., Boden, N., Borchers, A., Boyle, R., Cantin, P., Chao, C., Clark, C., Coriell, J., Daley, M., Dau, M., Dean, J., Gelb, B., Ghaemmaghami, T. V., Gottipati, R., Gulland, W., Hagmann, R., Ho, C. R., Hogberg, D., Hu, J., Hundt, R., Hurt, D., Ibarz, J., Jaffey, A., Jaworski, A., Kaplan, A., Khaitan, H., Killebrew, D., Koch, A., Kumar, N., Lacy, S., Laudon, J., Law, J., Le, D., Leary, C., Liu, Z., Lucke, K., Lundin, A., MacKean, G., Maggiore, A., Mahony, M., Miller, K., Nagarajan, R., Narayanaswami, R., Ni, R., Nix, K., Norrie, T., Omernick, M., Penukonda, N., Phelps, A., Ross, J., Ross, M., Salek, A., Samadiani, E., Severn, C., Sizikov, G., Snelham, M., Souter, J., Steinberg, D., Swing, A., Tan, M., Thorson, G., Tian, B., Toma, H., Tuttle, E., Vasudevan, V., Walter, R., Wang, W., Wilcox, E., and Yoon, D. H. In-datacenter performance analysis of a tensor processing unit. In Intl. Symp. on Computer Architecture (2017)."},{"key":"e_1_3_2_1_29_1","volume-title":"Div2k JPEG image dataset. https:\/\/www.kaggle.com\/datasets\/mingyuouyang\/div2k-jpeg-0400","author":"Kaggle","year":"2024","unstructured":"Kaggle. Div2k JPEG image dataset. https:\/\/www.kaggle.com\/datasets\/mingyuouyang\/div2k-jpeg-0400, 2024."},{"key":"e_1_3_2_1_30_1","volume-title":"Flickr image dataset. https:\/\/www.kaggle.com\/datasets\/hsankesara\/flickr-image-dataset","author":"Kaggle","year":"2024","unstructured":"Kaggle. Flickr image dataset. https:\/\/www.kaggle.com\/datasets\/hsankesara\/flickr-image-dataset, 2024."},{"key":"e_1_3_2_1_31_1","volume-title":"IEEE\/ACM Intl. Symp. on Microarchitecture","author":"Karandikar S.","year":"2021","unstructured":"Karandikar, S., Leary, C., Kennelly, C., Zhao, J., Parimi, D., Nikolic, B., Asanovic, K., and Ranganathan, P. A hardware accelerator for protocol buffers. In IEEE\/ACM Intl. Symp. on Microarchitecture (2021)."},{"key":"e_1_3_2_1_32_1","volume-title":"Intl. Symp. on Computer Architecture","author":"Karandikar S.","year":"2018","unstructured":"Karandikar, S., Mao, H., Kim, D., Biancolin, D., Amid, A., Lee, D., Pemberton, N., Amaro, E., Schmidt, C., Chopra, A., Huang, Q., Kovacs, K., Nikolic, B., Katz, R. H., Bachrach, J., and Asanovic, K. FireSim: FPGA-accelerated cycle-exact scale-out system simulation in the public cloud. In Intl. Symp. on Computer Architecture (2018)."},{"key":"e_1_3_2_1_33_1","volume-title":"SimBricks: End-to-End Network System Evaluation with Modular Simulation. In ACM SIGCOMM Conf.","author":"Li H.","year":"2022","unstructured":"Li, H., Li, J., and Kaufmann, A. SimBricks: End-to-End Network System Evaluation with Modular Simulation. In ACM SIGCOMM Conf. (2022)."},{"key":"e_1_3_2_1_34_1","volume-title":"Intl. Conf. on Parallel Processing","author":"L\u00f3pez-Parad\u00eds G.","year":"2021","unstructured":"L\u00f3pez-Parad\u00eds, G., Armejach, A., and Moret\u00f3, M. gem5+rtl: A framework to enable RTL models inside a full-system simulator. In Intl. Conf. on Parallel Processing (2021)."},{"key":"e_1_3_2_1_35_1","volume-title":"The gem5 simulator: Version 20.0+","author":"Lowe-Power J.","year":"2020","unstructured":"Lowe-Power, J., Ahmad, A. M., Akram, A., Alian, M., Amslinger, R., Andreozzi, M., Armejach, A., Asmussen, N., Beckmann, B., Bharadwaj, S., Black, G., Bloom, G., Bruce, B. R., Carvalho, D. R., Castrillon, J., Chen, L., Derumigny, N., Diestelhorst, S., Elsasser, W., Escuin, C., Fariborz, M., Farmahini-Farahani, A., Fotouhi, P., Gambord, R., Gandhi, J., Gope, D., Grass, T., Gutierrez, A., Hanindhito, B., Hansson, A., Haria, S., Harris, A., Hayes, T., Herrera, A., Horsnell, M., Jafri, S. A. R., Jagtap, R., Jang, H., Jeyapaul, R., Jones, T. M., Jung, M., Kannoth, S., Khaleghzadeh, H., Kodama, Y., Krishna, T., Marinelli, T., Menard, C., Mondelli, A., Moreto, M., M\u00fcck, T., Naji, O., Nathella, K., Nguyen, H., Nikoleris, N., Olson, L. E., Orr, M., Pham, B., Prieto, P., Reddy, T., Roelke, A., Samani, M., Sandberg, A., Setoain, J., Shingarov, B., Sinclair, M. D., Ta, T., Thakur, R., Travaglini, G., Upton, M., Vaish, N., Vougioukas, I., Wang, W., Wang, Z., Wehn, N., Weis, C., Wood, D. A., Yoon, H., and \u00c9der F. Zulian. The gem5 simulator: Version 20.0+, 2020."},{"key":"e_1_3_2_1_36_1","volume-title":"LPN source code repository. https:\/\/github.com\/dslab-epfl\/lpn","author":"Ma J.","year":"2024","unstructured":"Ma, J., Iyer, R., Kashani, S., Emami, M., Bourgeat, T., and Candea, G. LPN source code repository. https:\/\/github.com\/dslab-epfl\/lpn, 2024."},{"key":"e_1_3_2_1_37_1","volume-title":"Symp. on Operating Sys. Design and Implem.","author":"Ma J.","year":"2024","unstructured":"Ma, J., Iyer, R., Kashani, S., Emami, M., Bourgeat, T., and Candea, G. Performance interfaces for hardware accelerators. In Symp. on Operating Sys. Design and Implem. (2024)."},{"key":"e_1_3_2_1_38_1","volume-title":"Simics: A full system simulation platform. Computer","author":"Magnusson P. S.","year":"2002","unstructured":"Magnusson, P. S., Christensson, M., Eskilson, J., Forsgren, D., H\u00e5llberg, G., H\u00f6gberg, J., Larsson, F., Moestedt, A., and Werner, B. Simics: A full system simulation platform. Computer (2002)."},{"key":"e_1_3_2_1_39_1","volume-title":"Intl. Conf. on Embedded Computer Systems: Architectures, Modeling, and Simulation","author":"Menard C.","year":"2017","unstructured":"Menard, C., Castrillon, J., Jung, M., and Wehn, N. System simulation with gem5 and SystemC: The keystone for full interoperability. In Intl. Conf. on Embedded Computer Systems: Architectures, Modeling, and Simulation (2017)."},{"key":"e_1_3_2_1_40_1","volume-title":"Our next-generation Meta training and inference accelerator. https:\/\/ai.meta.com\/blog\/next-generation-meta-training-inference-accelerator-AI-MTIA","author":"Meta AI.","year":"2024","unstructured":"Meta AI. Our next-generation Meta training and inference accelerator. https:\/\/ai.meta.com\/blog\/next-generation-meta-training-inference-accelerator-AI-MTIA, 2024."},{"key":"e_1_3_2_1_41_1","volume-title":"Enhancing infrastructure efficiency with Azure Boost DPU. https:\/\/techcommunity.microsoft.com\/blog\/azureinfrastructureblog\/enhancing-infrastructure-efficiency-with-azure-boost-dpu\/4298901","author":"Microsoft Azure","year":"2024","unstructured":"Microsoft Azure. Enhancing infrastructure efficiency with Azure Boost DPU. https:\/\/techcommunity.microsoft.com\/blog\/azureinfrastructureblog\/enhancing-infrastructure-efficiency-with-azure-boost-dpu\/4298901, 2024."},{"key":"e_1_3_2_1_42_1","volume-title":"Intl. Symp. on High-Performance Computer Architecture","author":"Miller J. E.","year":"2010","unstructured":"Miller, J. E., Kasture, H., Kurian, G., Gruenwald, C., Beckmann, N., Celio, C., Eastep, J., and Agarwal, A. Graphite: A distributed parallel simulator for multicores. In Intl. Symp. on High-Performance Computer Architecture (2010)."},{"key":"e_1_3_2_1_43_1","volume-title":"Intl. Symp. on Computer Architecture","author":"Nagendra N. P.","year":"2023","unstructured":"Nagendra, N. P., Godala, B. R., Chaturvedi, I., Patel, A., Kanev, S., Moseley, T., Stark, J., Pokam, G. A., Campanoni, S., and August, D. I. EMISSARY: Enhanced miss awareness replacement policy for L2 instruction caching. In Intl. Symp. on Computer Architecture (2023)."},{"key":"e_1_3_2_1_44_1","volume-title":"IEEE\/ACM Intl. Symp. on Microarchitecture","author":"Nayak N.","year":"2023","unstructured":"Nayak, N., Odemuyiwa, T. O., Ugare, S., Fletcher, C. W., Pellauer, M., and Emer, J. S. TeAAL: A declarative framework for modeling sparse tensor accelerators. In IEEE\/ACM Intl. Symp. on Microarchitecture (2023)."},{"key":"e_1_3_2_1_45_1","volume-title":"IEEE Hot Chips Symposium","author":"Norrie T.","year":"2020","unstructured":"Norrie, T., Patil, N., Yoon, D. H., Kurian, G., Li, S., Laudon, J., Young, C., Jouppi, N. P., and Patterson, D. A. Google's training chips revealed: TPUv2 and TPUv3. In IEEE Hot Chips Symposium (2020)."},{"key":"e_1_3_2_1_46_1","volume-title":"IEEE Intl. Symp. on Performance Analysis of Systems and Software","author":"Parashar A.","year":"2019","unstructured":"Parashar, A., Raina, P., Shao, Y. S., Chen, Y.-H., Ying, V. A., Mukkara, A., Venkatesan, R., Khailany, B., Keckler, S. W., and Emer, J. S. Timeloop: A systematic approach to DNN accelerator evaluation. In IEEE Intl. Symp. on Performance Analysis of Systems and Software (2019)."},{"key":"e_1_3_2_1_47_1","unstructured":"Performance interfaces (project website). https:\/\/dslab.epfl.ch\/research\/perf."},{"key":"e_1_3_2_1_48_1","volume-title":"Intl. Conf. on Architectural Support for Programming Languages and Operating Systems","author":"Ranganathan P.","year":"2021","unstructured":"Ranganathan, P., Stodolsky, D., Calow, J., Dorfman, J., Hechtman, M. G., Smullen, C., Kuusela, A., Laursen, A. J., Ramirez, A., Wijaya, A. A., Salek, A., Cheung, A., Gelb, B., Fosco, B., Kyaw, C. M., He, D., Munday, D. A., Wickeraad, D., Persaud, D., Stark, D., Walton, D., Indupalli, E., Perkins-Argueta, E., Lou, F., Wu, H. K., Chong, I. S., Jayaram, I., Feng, J., Maaninen, J., Lucke, K. A., Mahony, M., Wachsler, M. S., Tan, M., Penukonda, N., Dasharathi, N., Kongetira, P., Chauhan, P., Balasubramanian, R., Macias, R., Ho, R., Springer, R., Huffman, R. W., Foss, S., Bhatia, S., Gwin, S. J., Sekar, S. K., Sokolov, S. N., Muroor, S., Rautio, V.M., Ripley, Y., Hase, Y., and Li, Y. Warehouse-scale video acceleration: Co-design and deployment in the wild. In Intl. Conf. on Architectural Support for Programming Languages and Operating Systems (2021)."},{"key":"e_1_3_2_1_49_1","volume-title":"ACM SIGMETRICS Conf.","author":"Reinhardt S. K.","year":"1993","unstructured":"Reinhardt, S. K., Hill, M. D., Larus, J. R., Lebeck, A. R., Lewis, J. C., and Wood, D. A. The Wisconsin Wind Tunnel: Virtual prototyping of parallel computers. In ACM SIGMETRICS Conf. (1993)."},{"key":"e_1_3_2_1_50_1","volume-title":"Intl. Symp. on Computer Architecture","author":"Sanchez D.","year":"2013","unstructured":"Sanchez, D., and Kozyrakis, C. Zsim: Fast and accurate microarchitectural simulation of thousand-core systems. In Intl. Symp. on Computer Architecture (2013)."},{"key":"e_1_3_2_1_51_1","volume-title":"Scheduling extension framework. https:\/\/github.com\/sched-ext\/scx","author":"SCX","year":"2024","unstructured":"SCX: Scheduling extension framework. https:\/\/github.com\/sched-ext\/scx, 2024."},{"key":"e_1_3_2_1_52_1","volume-title":"IEEE\/ACM Intl. Symp. on Microarchitecture","author":"Shao Y. S.","year":"2016","unstructured":"Shao, Y. S., Xi, S. L., Srinivasan, V., Wei, G.-Y., and Brooks, D. Co-designing accelerators and SoC interfaces using gem5-Aladdin. In IEEE\/ACM Intl. Symp. on Microarchitecture (2016)."},{"key":"e_1_3_2_1_53_1","author":"Sofia A. T.","year":"2020","unstructured":"Sofia, A. T., Klein, M., Stilwell, B. D., Weishaupt, S., Chen, Q. Y., and John, R. W. S. Integration of z15 processor-based DEFLATE acceleration into IBM z\/OS. IBM J. Res. Dev. (2020).","journal-title":"J. Res. Dev. ("},{"key":"e_1_3_2_1_54_1","volume-title":"Intl. Conf. on Architectural Support for Programming Languages and Operating Systems","author":"Sriraman A.","year":"2020","unstructured":"Sriraman, A., and Dhanotia, A. Accelerometer: Understanding acceleration opportunities for data center overheads at hyperscale. In Intl. Conf. on Architectural Support for Programming Languages and Operating Systems (2020)."},{"key":"e_1_3_2_1_55_1","volume-title":"VCS: Functional verification solution. https:\/\/www.synopsys.com\/verification\/simulation\/vcs.html","author":"Synopsys","year":"2024","unstructured":"Synopsys, Inc. VCS: Functional verification solution. https:\/\/www.synopsys.com\/verification\/simulation\/vcs.html, 2024."},{"key":"e_1_3_2_1_56_1","volume-title":"Intl. Conf. on Architectural Support for Programming Languages and Operating Systems","author":"Tan Z.","year":"2015","unstructured":"Tan, Z., Qian, Z., Chen, X., Asanovic, K., and Patterson, D. DIABLO: A warehouse-scale computer network simulator using FPGAs. In Intl. Conf. on Architectural Support for Programming Languages and Operating Systems (2015)."},{"key":"e_1_3_2_1_57_1","volume-title":"Design Automation Conf.","author":"Tan Z.","year":"2010","unstructured":"Tan, Z., Waterman, A., Avizienis, R., Lee, Y., Cook, H., Patterson, D., and Asanovi\u0107, K. RAMP gold: An FPGA-based architecture simulator for multiprocessors. In Design Automation Conf. (2010)."},{"key":"e_1_3_2_1_58_1","volume-title":"Intl. Conf. on Emerging Networking Experiments and Technologies","author":"Tazaki H.","year":"2013","unstructured":"Tazaki, H., Uarbani, F., Mancini, E., Lacage, M., Camara, D., Turletti, T., and Dabbous, W. Direct code execution: Revisiting library OS architecture for reproducible network experiments. In Intl. Conf. on Emerging Networking Experiments and Technologies (2013)."},{"key":"e_1_3_2_1_59_1","volume-title":"CFS scheduler. https:\/\/docs.kernel.org\/scheduler\/sched-design-CFS.html","author":"The Linux","year":"2025","unstructured":"The Linux kernel development community. CFS scheduler. https:\/\/docs.kernel.org\/scheduler\/sched-design-CFS.html, 2025."},{"key":"e_1_3_2_1_60_1","volume-title":"High-throughput JPEG decoder. https:\/\/github.com\/ultraembedded\/core_jpeg","author":"Ultra-Embedded","year":"2020","unstructured":"Ultra-Embedded. High-throughput JPEG decoder. https:\/\/github.com\/ultraembedded\/core_jpeg, 2020."},{"key":"e_1_3_2_1_61_1","unstructured":"Veripool. The Verilator simulator. https:\/\/www.veripool.org\/verilator\/."},{"key":"e_1_3_2_1_62_1","volume-title":"Intl. Symp. on High-Performance Computer Architecture","author":"Villa O.","year":"2021","unstructured":"Villa, O., Lustig, D., Yan, Z., Bolotin, E., Fu, Y., Chatterjee, N., Jiang, N., and Nellans, D. Need for speed: Experiences building a trustworthy system-level GPU simulator. In Intl. Symp. on High-Performance Computer Architecture (2021)."},{"key":"e_1_3_2_1_63_1","volume-title":"Cascade Lake microarchitectures. https:\/\/en.wikichip.org\/wiki\/intel\/microarchitectures\/cascade_lake","author":"Wikichip","year":"2024","unstructured":"Wikichip. Cascade Lake microarchitectures. https:\/\/en.wikichip.org\/wiki\/intel\/microarchitectures\/cascade_lake, 2024."},{"key":"e_1_3_2_1_64_1","volume-title":"Skylake microarchitectures. https:\/\/en.wikichip.org\/wiki\/intel\/microarchitectures\/skylake_(client)#Pipeline","author":"Wikichip","year":"2024","unstructured":"Wikichip. Skylake microarchitectures. https:\/\/en.wikichip.org\/wiki\/intel\/microarchitectures\/skylake_(client)#Pipeline, 2024."},{"key":"e_1_3_2_1_65_1","volume-title":"Intl. Symp. on Computer Architecture","author":"Zhang N.","year":"2024","unstructured":"Zhang, N., Lacouture, R., Sohn, G., Mure, P., Zhang, Q., Kjolstad, F., and Olukotun, K. The dataflow abstract machine simulator framework. In Intl. Symp. on Computer Architecture (2024)."}],"event":{"name":"SOSP '25: ACM SIGOPS 31st Symposium on Operating Systems Principles","location":"Lotte Hotel World Seoul Republic of Korea","acronym":"SOSP '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","USENIX"]},"container-title":["Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles"],"original-title":[],"deposited":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T12:54:56Z","timestamp":1759323296000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731569.3764825"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":65,"alternative-id":["10.1145\/3731569.3764825","10.1145\/3731569"],"URL":"https:\/\/doi.org\/10.1145\/3731569.3764825","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"2025-10-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}