{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:47:07Z","timestamp":1772725627826,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,3,9]],"date-time":"2020-03-09T00:00:00Z","timestamp":1583712000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100006133","name":"Advanced Research Projects Agency - Energy","doi-asserted-by":"publisher","award":["DE-AR0000849"],"award-info":[{"award-number":["DE-AR0000849"]}],"id":[{"id":"10.13039\/100006133","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,3,9]]},"DOI":"10.1145\/3373376.3378455","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T22:37:01Z","timestamp":1584139021000},"page":"715-731","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["FirePerf"],"prefix":"10.1145","author":[{"given":"Sagar","family":"Karandikar","sequence":"first","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Albert","family":"Ou","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Alon","family":"Amid","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Howard","family":"Mao","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Randy","family":"Katz","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Borivoje","family":"Nikoli\u0107","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Krste","family":"Asanovi\u0107","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2020,3,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2018. Kendryte K210 Announcement. https:\/\/cnrv.io\/bi-week-rpts\/ 2018-09--16.  2018. Kendryte K210 Announcement. https:\/\/cnrv.io\/bi-week-rpts\/ 2018-09--16."},{"key":"e_1_3_2_1_2_1","unstructured":"2019. FireSim: Easy-to-use Scalable FPGA-accelerated Cycle-accurate Hardware Simulation in the Cloud. https:\/\/github.com\/firesim\/firesim.  2019. FireSim: Easy-to-use Scalable FPGA-accelerated Cycle-accurate Hardware Simulation in the Cloud. https:\/\/github.com\/firesim\/firesim."},{"key":"e_1_3_2_1_3_1","unstructured":"2019. Network Maximum Transmission Unit (MTU) for Your EC2 Instance. https:\/\/docs.aws.amazon.com\/AWSEC2\/latest\/UserGuide\/ network_mtu.html.  2019. Network Maximum Transmission Unit (MTU) for Your EC2 Instance. https:\/\/docs.aws.amazon.com\/AWSEC2\/latest\/UserGuide\/ network_mtu.html."},{"key":"e_1_3_2_1_4_1","unstructured":"2019. strace: strace is a diagnostic debugging and instructional userspace utility for Linux. https:\/\/github.com\/strace\/strace.  2019. strace: strace is a diagnostic debugging and instructional userspace utility for Linux. https:\/\/github.com\/strace\/strace."},{"key":"e_1_3_2_1_5_1","volume-title":"Addressing the Challenges of Synchronization\/Communication and Debugging Support in Hardware\/Software Cosimulation. In 21st International Conference on VLSI Design (VLSID 2008","author":"Agrawal B.","year":"2008","unstructured":"B. Agrawal , T. Sherwood , C. Shin , and S. Yoon . 2008 . Addressing the Challenges of Synchronization\/Communication and Debugging Support in Hardware\/Software Cosimulation. In 21st International Conference on VLSI Design (VLSID 2008 ). 354--361. https:\/\/doi.org\/10. 1109\/VLSI. 2008 .74 B. Agrawal, T. Sherwood, C. Shin, and S. Yoon. 2008. Addressing the Challenges of Synchronization\/Communication and Debugging Support in Hardware\/Software Cosimulation. In 21st International Conference on VLSI Design (VLSID 2008). 354--361. https:\/\/doi.org\/10. 1109\/VLSI.2008.74"},{"key":"e_1_3_2_1_7_1","volume-title":"DAC Design Automation Conference","author":"Bachrach J.","year":"2012","unstructured":"J. Bachrach , H. Vo , B. Richards , Y. Lee , A. Waterman , R. Aviienis , J. Wawrzynek , and K. Asanovic . 2012. Chisel: Constructing hardware in a Scala embedded language . In DAC Design Automation Conference 2012 . 1212--1221. https:\/\/doi.org\/10.1145\/2228360.2228584 10.1145\/2228360.2228584 J. Bachrach, H. Vo, B. Richards, Y. Lee, A. Waterman, R. Aviienis, J. Wawrzynek, and K. Asanovic. 2012. Chisel: Constructing hardware in a Scala embedded language. In DAC Design Automation Conference 2012. 1212--1221. https:\/\/doi.org\/10.1145\/2228360.2228584"},{"key":"e_1_3_2_1_8_1","unstructured":"Jeff Barr. 2018. New C5n Instances with 100 Gbps Networking. https:\/\/aws.amazon.com\/blogs\/aws\/new-c5n-instances-with-100- gbps-networking\/.  Jeff Barr. 2018. New C5n Instances with 100 Gbps Networking. https:\/\/aws.amazon.com\/blogs\/aws\/new-c5n-instances-with-100- gbps-networking\/."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3015146"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293894"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2006.82"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2005.35"},{"key":"e_1_3_2_1_13_1","unstructured":"Brendan Gregg. 2019. Flame Graphs. http:\/\/www.brendangregg.com\/ flamegraphs.html.  Brendan Gregg. 2019. Flame Graphs. http:\/\/www.brendangregg.com\/ flamegraphs.html."},{"key":"e_1_3_2_1_14_1","unstructured":"Brendan Gregg. 2019. FlameGraph: Stack trace visualizer. https: \/\/github.com\/brendangregg\/FlameGraph.  Brendan Gregg. 2019. FlameGraph: Stack trace visualizer. https: \/\/github.com\/brendangregg\/FlameGraph."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.36"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1534916.1534925"},{"key":"e_1_3_2_1_18_1","volume-title":"Slides from Linux Kongress","author":"De Melo Arnaldo Carvalho","unstructured":"Arnaldo Carvalho De Melo . 2010. The New Linux perf Tools . In Slides from Linux Kongress , Vol. 18 . Arnaldo Carvalho De Melo. 2010. The New Linux perf Tools. In Slides from Linux Kongress, Vol. 18."},{"key":"e_1_3_2_1_19_1","volume-title":"RPPM: Rapid Performance Prediction of Multithreaded Workloads on Multicore Processors. In 2019 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). 257--267","author":"Pestel S. De","year":"2019","unstructured":"S. De Pestel , S. Van den Steen, S. Akram, and L. Eeckhout. 2019 . RPPM: Rapid Performance Prediction of Multithreaded Workloads on Multicore Processors. In 2019 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). 257--267 . https: \/\/doi.org\/10.1109\/ISPASS. 2019 .00038 10.1109\/ISPASS.2019.00038 S. De Pestel, S. Van den Steen, S. Akram, and L. Eeckhout. 2019. RPPM: Rapid Performance Prediction of Multithreaded Workloads on Multicore Processors. In 2019 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). 257--267. https: \/\/doi.org\/10.1109\/ISPASS.2019.00038"},{"key":"e_1_3_2_1_20_1","unstructured":"DWARF Debugging Information Format Committee. 2017. DWARF Debugging Information Format Version 5. Standard. http:\/\/www. dwarfstd.org\/doc\/DWARF5.pdf  DWARF Debugging Information Format Committee. 2017. DWARF Debugging Information Format Version 5. Standard. http:\/\/www. dwarfstd.org\/doc\/DWARF5.pdf"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/1855084"},{"key":"e_1_3_2_1_22_1","unstructured":"ESnet\/LBNL. 2019. iPerf - The ultimate speed test tool for TCP UDP and SCTP. https:\/\/iperf.fr\/.  ESnet\/LBNL. 2019. iPerf - The ultimate speed test tool for TCP UDP and SCTP. https:\/\/iperf.fr\/."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2014.41"},{"key":"e_1_3_2_1_24_1","volume-title":"Azure Accelerated Networking: SmartNICs in the Public Cloud. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18)","author":"Firestone Daniel","year":"2018","unstructured":"Daniel Firestone , AndrewPutnam, Sambhrama Mundkur , Derek Chiou , Alireza Dabagh , Mike Andrewartha , Hari Angepat , Vivek Bhanu , Adrian Caulfield , Eric Chung , Harish Kumar Chandrappa , Somesh Chaturmohta , Matt Humphrey , Jack Lavier , Norman Lam , Fengfen Liu , Kalin Ovtcharov , Jitu Padhye , Gautham Popuri , Shachar Raindel , Tejas Sapre , Mark Shaw , Gabriel Silva , Madhan Sivakumar , Nisheeth Srivastava , Anshuman Verma , Qasim Zuhair , Deepak Bansal , Doug Burger , Kushagra Vaid , David A. Maltz , and Albert Greenberg . 2018 . Azure Accelerated Networking: SmartNICs in the Public Cloud. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18) . USENIX Association, Renton, WA, 51--66. https: \/\/www.usenix.org\/conference\/nsdi18\/presentation\/firestone Daniel Firestone, AndrewPutnam, Sambhrama Mundkur, Derek Chiou, Alireza Dabagh, Mike Andrewartha, Hari Angepat, Vivek Bhanu, Adrian Caulfield, Eric Chung, Harish Kumar Chandrappa, Somesh Chaturmohta, Matt Humphrey, Jack Lavier, Norman Lam, Fengfen Liu, Kalin Ovtcharov, Jitu Padhye, Gautham Popuri, Shachar Raindel, Tejas Sapre, Mark Shaw, Gabriel Silva, Madhan Sivakumar, Nisheeth Srivastava, Anshuman Verma, Qasim Zuhair, Deepak Bansal, Doug Burger, Kushagra Vaid, David A. Maltz, and Albert Greenberg. 2018. Azure Accelerated Networking: SmartNICs in the Public Cloud. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18). USENIX Association, Renton, WA, 51--66. https: \/\/www.usenix.org\/conference\/nsdi18\/presentation\/firestone"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2909476"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD.2017.8203780"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00034"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750392"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00014"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2019.2910175"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2018.00021"},{"issue":"8","key":"e_1_3_2_1_33_1","first-page":"1","article-title":"The Hwacha Vector-Fetch Architecture Manual","volume":"3","author":"Lee Yunsup","year":"2015","unstructured":"Yunsup Lee , Colin Schmidt , Albert Ou , Andrew Waterman , and Krste Asanovi?. 2015 . The Hwacha Vector-Fetch Architecture Manual , Version 3 . 8 . 1 . Technical Report UCB\/EECS-2015--262. EECS Department, University of California, Berkeley. Yunsup Lee, Colin Schmidt, Albert Ou, Andrew Waterman, and Krste Asanovi?. 2015. The Hwacha Vector-Fetch Architecture Manual, Version 3.8.1. Technical Report UCB\/EECS-2015--262. EECS Department, University of California, Berkeley.","journal-title":"Version"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00056"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/166955.167023"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis","author":"McCalpin John D.","year":"2018","unstructured":"John D. McCalpin . 2018 . HPL and DGEMM Performance Variability on the Xeon Platinum 8160 Processor . In Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis ( Dallas, Texas) (SC '18). IEEE Press, Piscataway, NJ, USA, Article 18, 13 pages. https:\/\/doi.org\/10.1109\/SC. 2018.00021 10.1109\/SC.2018.00021 John D. McCalpin. 2018. HPL and DGEMM Performance Variability on the Xeon Platinum 8160 Processor. In Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis (Dallas, Texas) (SC '18). IEEE Press, Piscataway, NJ, USA, Article 18, 13 pages. https:\/\/doi.org\/10.1109\/SC.2018.00021"},{"key":"e_1_3_2_1_37_1","volume-title":"8th Network and Parallel Computing (NPC) (Network and Parallel Computing)","author":"Moseley Tipp","unstructured":"Tipp Moseley , Neil Vachharajani , and William Jalby . 2011. Hardware Performance Monitoring for the Rest of Us: A Position and Survey . In 8th Network and Parallel Computing (NPC) (Network and Parallel Computing) , Erik Altman and Weisong Shi (Eds.), Vol. LNCS-6985 . Springer , Changsha? China , 293--312. https:\/\/doi.org\/10.1007\/978--3- 642--24403--2_23 Part 8: Session 8: Microarchitecture. 10.1007\/978--3- Tipp Moseley, Neil Vachharajani, and William Jalby. 2011. Hardware Performance Monitoring for the Rest of Us: A Position and Survey. In 8th Network and Parallel Computing (NPC) (Network and Parallel Computing), Erik Altman and Weisong Shi (Eds.), Vol. LNCS-6985. Springer, Changsha? China, 293--312. https:\/\/doi.org\/10.1007\/978--3- 642--24403--2_23 Part 8: Session 8: Microarchitecture."},{"key":"e_1_3_2_1_38_1","volume-title":"Automation and Test in Europe Conference and Exhibition. 120--125 suppl. https: \/\/doi.org\/10","author":"Moussa I.","year":"2003","unstructured":"I. Moussa , T. Grellier , and G. Nguyen . 2003. Exploring SW performance using SoC transaction-level modeling. In 2003 Design , Automation and Test in Europe Conference and Exhibition. 120--125 suppl. https: \/\/doi.org\/10 .1109\/DATE. 2003 .1186682 10.1109\/DATE.2003.1186682 I. Moussa, T. Grellier, and G. Nguyen. 2003. Exploring SW performance using SoC transaction-level modeling. In 2003 Design, Automation and Test in Europe Conference and Exhibition. 120--125 suppl. https: \/\/doi.org\/10.1109\/DATE.2003.1186682"},{"key":"e_1_3_2_1_39_1","volume-title":"Automation Test in Europe Conference Exhibition. 1--6. https:\/\/doi.org\/10","author":"Ogras U. Y.","year":"2007","unstructured":"U. Y. Ogras and R. Marculescu . 2007. Analytical Router Modeling for Networks-on-Chip Performance Analysis. In 2007 Design , Automation Test in Europe Conference Exhibition. 1--6. https:\/\/doi.org\/10 .1109\/ DATE. 2007 .364440 U. Y. Ogras and R. Marculescu. 2007. Analytical Router Modeling for Networks-on-Chip Performance Analysis. In 2007 Design, Automation Test in Europe Conference Exhibition. 1--6. https:\/\/doi.org\/10.1109\/ DATE.2007.364440"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749747"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/781027.781076"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2017.7926956"},{"key":"e_1_3_2_1_43_1","volume-title":"Hardware\/Software Co-Simulation. In 31st Design Automation Conference. 439--440","author":"Rowson J. A.","year":"1994","unstructured":"J. A. Rowson . 1994 . Hardware\/Software Co-Simulation. In 31st Design Automation Conference. 439--440 . https:\/\/doi.org\/10.1109\/DAC.1994. 204143 10.1109\/DAC.1994 J. A. Rowson. 1994. Hardware\/Software Co-Simulation. In 31st Design Automation Conference. 439--440. https:\/\/doi.org\/10.1109\/DAC.1994. 204143"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/1391469.1391543"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783762"},{"key":"e_1_3_2_1_46_1","unstructured":"SiFive. 2018. SiFive HiFive Unleashed Getting Started Guide. https:\/\/sifive.cdn.prismic.io\/sifive\/fa3a584a-a02f-4fda-b758- a2def05f49f9_hifive-unleashed-getting-started-guide-v1p1.pdf.  SiFive. 2018. SiFive HiFive Unleashed Getting Started Guide. https:\/\/sifive.cdn.prismic.io\/sifive\/fa3a584a-a02f-4fda-b758- a2def05f49f9_hifive-unleashed-getting-started-guide-v1p1.pdf."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/1837274.1837390"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/1216919.1216936"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056063"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/871656.859629"},{"key":"e_1_3_2_1_51_1","volume-title":"SC '08: Proceedings of the 2008 ACM\/IEEE Conference on Supercomputing. 1--12","author":"Yoshino T.","year":"2008","unstructured":"T. Yoshino , Y. Sugawara , K. Inagami , J. Tamatsukuri , M. Inaba , and K. Hiraki . 2008. Performance optimization of TCP\/IP over 10 Gigabit Ethernet by precise instrumentation . In SC '08: Proceedings of the 2008 ACM\/IEEE Conference on Supercomputing. 1--12 . https:\/\/doi.org\/10. 1109\/SC. 2008 .5215913 T. Yoshino, Y. Sugawara, K. Inagami, J. Tamatsukuri, M. Inaba, and K. Hiraki. 2008. Performance optimization of TCP\/IP over 10 Gigabit Ethernet by precise instrumentation. In SC '08: Proceedings of the 2008 ACM\/IEEE Conference on Supercomputing. 1--12. https:\/\/doi.org\/10. 1109\/SC.2008.5215913"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897937.2897977"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.5555\/998680.1006720"}],"event":{"name":"ASPLOS '20: Architectural Support for Programming Languages and Operating Systems","location":"Lausanne Switzerland","acronym":"ASPLOS '20","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378455","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3373376.3378455","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3373376.3378455","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:32:59Z","timestamp":1750199579000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378455"}},"subtitle":["FPGA-Accelerated Full-System Hardware\/Software Performance Profiling and Co-Design"],"short-title":[],"issued":{"date-parts":[[2020,3,9]]},"references-count":50,"alternative-id":["10.1145\/3373376.3378455","10.1145\/3373376"],"URL":"https:\/\/doi.org\/10.1145\/3373376.3378455","relation":{},"subject":[],"published":{"date-parts":[[2020,3,9]]},"assertion":[{"value":"2020-03-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}