{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T16:33:33Z","timestamp":1773246813510,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,12]],"date-time":"2019-10-12T00:00:00Z","timestamp":1570838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100002790","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002790","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000028","name":"Semiconductor Research Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000028","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,12]]},"DOI":"10.1145\/3352460.3358292","type":"proceedings-article","created":{"date-parts":[[2019,10,11]],"date-time":"2019-10-11T11:16:45Z","timestamp":1570792605000},"page":"940-953","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":28,"title":["\u03bcIR -An intermediate representation for transforming and optimizing the microarchitecture of application accelerators"],"prefix":"10.1145","author":[{"given":"Amirali","family":"Sharifian","sequence":"first","affiliation":[{"name":"Simon Fraser University"}]},{"given":"Reza","family":"Hojabr","sequence":"additional","affiliation":[{"name":"Simon Fraser University"}]},{"given":"Navid","family":"Rahimi","sequence":"additional","affiliation":[{"name":"Simon Fraser University"}]},{"given":"Sihao","family":"Liu","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles"}]},{"given":"Apala","family":"Guha","sequence":"additional","affiliation":[{"name":"Simon Fraser University"}]},{"given":"Tony","family":"Nowatzki","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles"}]},{"given":"Arrvindh","family":"Shriraman","sequence":"additional","affiliation":[{"name":"Simon Fraser University"}]}],"member":"320","published-online":{"date-parts":[[2019,10,12]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Catapult High-Level Synthesis. https:\/\/www.mentor.com\/hls-lp\/catapult-high-level-synthesis\/."},{"key":"e_1_3_2_1_2_1","unstructured":"Enabling rapid design space exploration and prototyping of dnn accelerators. http:\/\/pwp.gatech.edu\/ece-synergy\/wp-content\/uploads\/sites\/332\/2019\/02\/2_NNDataflowAnalysis.pdf."},{"key":"e_1_3_2_1_3_1","unstructured":"Mlir primer: A compiler infrastructure for the end of moore\u00e2\u0102&Zacute;s law. https:\/\/github.com\/tensorflow\/mlir."},{"key":"e_1_3_2_1_4_1","unstructured":"Specification for the firrtl language. https:\/\/github.com\/freechipsproject\/firrtl\/blob\/master\/spec\/spec.pdf."},{"key":"e_1_3_2_1_5_1","unstructured":"Vivado Design Suite. https:\/\/www.xilinx.com\/products\/design-tools\/vivado.html."},{"key":"e_1_3_2_1_6_1","article-title":"Executing a program on the MIT tagged-token dataflow architecture","author":"Nikhil Arvind","year":"1990","unstructured":"Arvind and Rishiyur S. Nikhil. Executing a program on the MIT tagged-token dataflow architecture. IEEE Trans. Computers, 1990.","journal-title":"IEEE Trans. Computers"},{"key":"e_1_3_2_1_7_1","unstructured":"Jonathan Bachrach Huy Vo Brian Richards Yunsup Lee Andrew Waterman Rimas Avizienis John Wawrzynek and Krste Asanovic. Chisel: Constructing hardware in a scala embedded language. https:\/\/github.com\/freechipsproject\/chisel3."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2436256.2436271"},{"key":"e_1_3_2_1_9_1","unstructured":"Mihai Budiu and Seth Copen Goldstein. Pegasus: An efficient intermediate representation. Technical Report CMU-CS-02-107 Carnegie Mellon University May 2002."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CASES.2013.6662524"},{"key":"e_1_3_2_1_11_1","volume-title":"The renewed case for the reduced instruction set computer: Avoiding isa bloat with macro-op fusion for risc-v. arXiv preprint arXiv:1607.02318","author":"Celio Christopher","year":"2016","unstructured":"Christopher Celio, Palmer Dabbelt, David A Patterson, and Krste Asanovi\u0107. The renewed case for the reduced instruction set computer: Avoiding isa bloat with macro-op fusion for risc-v. arXiv preprint arXiv:1607.02318, 2016."},{"key":"e_1_3_2_1_12_1","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Q. Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. TVM: an automated end-to-end optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation, 2018."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2017.2720623"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2011.2110592"},{"key":"e_1_3_2_1_15_1","volume-title":"Cody Hao Yu, and Peng Zhang. Automated accelerator generation and optimization with composable, parallel and pipeline architecture","author":"Cong Jason","year":"2018","unstructured":"Jason Cong, Peng Wei, Cody Hao Yu, and Peng Zhang. Automated accelerator generation and optimization with composable, parallel and pipeline architecture. 2018."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/106972.106990"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.5555\/1025123.1025812"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MDT.2006.134"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370883"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2014.6927454"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155623"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2601097.2601174"},{"key":"e_1_3_2_1_23_1","volume-title":"PROC of the 12th HPCA","author":"Hu S","year":"2006","unstructured":"S Hu, I Kim, M H Lipasti, and J E Smith. An approach for implementing efficient superscalar CISC processors. In PROC of the 12th HPCA, 2006."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD.2017.8203780"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3174243.3174264"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3078155.3078163"},{"key":"e_1_3_2_1_27_1","volume-title":"Version 4.5 with SPIR-V","author":"Kessenich John","year":"2016","unstructured":"John Kessenich, Graham Sellers, and Dave Shreiner. OpenGL Programming Guide: The Official Guide to Learning OpenGL, Version 4.5 with SPIR-V. Addison-Wesley Professional, 2016."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the PLDI","author":"Koeplinger David","year":"2018","unstructured":"David Koeplinger, Matthew Feldman, Raghu Prabhakar, Yaqi Zhang, Stefan Hadjis, Ruben Fiszel, Tian Zhao, Luigi Nardi, Ardavan Pedram, Christos Kozyrakis, and Kunle Olukotun. Spatial: A language and compiler for application accelerators. In Proceedings of the PLDI, 2018."},{"key":"e_1_3_2_1_29_1","first-page":"115","volume-title":"Kunle Olukotun. Automatic Generation of Efficient Accelerators for Reconfigurable Hardware. In Proc. of the 43rd ISCA","author":"Koeplinger David","year":"2016","unstructured":"David Koeplinger, Raghu Prabhakar, Yaqi Zhang, Christina Delimitrou, Christos Kozyrakis, and Kunle Olukotun. Automatic Generation of Efficient Accelerators for Reconfigurable Hardware. In Proc. of the 43rd ISCA, pages 115--127, 2016."},{"key":"e_1_3_2_1_30_1","volume-title":"Proc. of the 23rd PPOPP","author":"Kotsifakou Maria","year":"2018","unstructured":"Maria Kotsifakou, Prakalp Srivastava, Matthew D. Sinclair, Rakesh Komuravelli, Vikram Adve, and Sarita Adve. Hpvm: Heterogeneous parallel virtual machine. In Proc. of the 23rd PPOPP, 2018."},{"key":"e_1_3_2_1_31_1","volume-title":"Proc. of FPGA","author":"Lai Yi-Hsiang","year":"2019","unstructured":"Yi-Hsiang Lai, Yuze Chi, Yuwei Hu, Jie Wang, Cody Hao Yu, Yuan Zhou, Jason Cong, and Zhiru Zhang. Heterocl: A multi-paradigm programming infrastructure for software-defined reconfigurable computing. In Proc. of FPGA, 2019."},{"key":"e_1_3_2_1_32_1","unstructured":"Maysam Lavasani. Generating irregular data-stream accelerators: methodology and applications. PhD thesis 2015."},{"key":"e_1_3_2_1_33_1","volume-title":"Feb","author":"Leary Chris","year":"2017","unstructured":"Chris Leary and Todd Wang. Xla: Tensorflow, compiled! TensorFlow Dev Summit, Feb 2017."},{"key":"e_1_3_2_1_34_1","volume-title":"The Cilk++ concurrency platform. The Journal of Supercomputing, 51(3):244--257","author":"Leiserson Charles E","year":"2010","unstructured":"Charles E Leiserson. The Cilk++ concurrency platform. The Journal of Supercomputing, 51(3):244--257, 2010."},{"key":"e_1_3_2_1_35_1","first-page":"280","volume-title":"Christopher Batten. PyMTL: A Unified Framework for Vertically Integrated Computer Architecture Research. In Proc. of the 47th MICRO","author":"Lockhart Derek","year":"2014","unstructured":"Derek Lockhart, Gary Zibrat, and Christopher Batten. PyMTL: A Unified Framework for Vertically Integrated Computer Architecture Research. In Proc. of the 47th MICRO, pages 280--292, 2014."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446050"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2015.2513673"},{"key":"e_1_3_2_1_38_1","volume-title":"July","author":"Noronha D. H.","year":"2018","unstructured":"D. H. Noronha, B. Salehpour, and S. J. E. Wilton. LeFlow: Enabling Flexible FPGA High-Level Synthesis of Tensorflow Deep Neural Networks. ArXiv e-prints, July 2018."},{"key":"e_1_3_2_1_39_1","unstructured":"Louis-Noel Pouchet and Uday Bondugula. Polybench 3.2. 2013. http:\/\/www.cse.ohio-state.edu\/~pouchet\/software\/polybench."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872415"},{"key":"e_1_3_2_1_41_1","volume-title":"Proc. of the 44th ISCA","author":"Prabhakar Raghu","year":"2017","unstructured":"Raghu Prabhakar, Yaqi Zhang, David Koeplinger, Matt Feldman, Tian Zhao, Stefan Hadjis, Ardavan Pedram, Christos Kozyrakis, and Kunle Olukotun. Plasticine: A reconfigurable architecture for parallel paterns. In Proc. of the 44th ISCA, 2017."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3107953"},{"key":"e_1_3_2_1_43_1","volume-title":"Putnam. FPGAs in the Datacenter - Combining the Worlds of Hardware and Software Development. ACM Great Lakes Symposium on VLSI","author":"Andrew","year":"2017","unstructured":"Andrew Putnam. FPGAs in the Datacenter - Combining the Worlds of Hardware and Software Development. ACM Great Lakes Symposium on VLSI, 2017."},{"key":"e_1_3_2_1_44_1","volume-title":"Proc. of PLDI","author":"Ragan-Kelley Jonathan","year":"2013","unstructured":"Jonathan Ragan-Kelley, Connelly Barnes, Andrew Adams, Sylvain Paris, Fr\u00e9do Durand, and Saman P Amarasinghe. Halide - a language and compiler for optimizing parallelism, locality, and recomputation in image processing pipelines. In Proc. of PLDI, 2013."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2014.6983050"},{"key":"e_1_3_2_1_46_1","first-page":"1","volume-title":"Proc. of CODES+ISSS","author":"Reiche Oliver","year":"2014","unstructured":"Oliver Reiche, Moritz Schmid, Frank Hannig, Richard Membarth, and J\u00fcrgen Teich. Code generation from a domain-specific language for C-based HLS of hardware accelerators. In Proc. of CODES+ISSS, pages 1--10, New York, New York, USA, 2014. ACM Press."},{"key":"e_1_3_2_1_47_1","volume-title":"November","author":"Rong Hongbo","year":"2017","unstructured":"Hongbo Rong. Programmatic Control of a Compiler for Generating High-performance Spatial Hardware. In arXiv.org, November 2017."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSD.2010.52"},{"key":"e_1_3_2_1_49_1","volume-title":"Intermediate Representation. In In Proc. of PPOPP","author":"Schardl Tao B","year":"2017","unstructured":"Tao B Schardl, William S Moses, and Charles E Leiserson. Tapir - Embedding Fork-Join Parallelism into LLVM's Intermediate Representation. In In Proc. of PPOPP, 2017."},{"key":"e_1_3_2_1_50_1","first-page":"68","volume-title":"Proc. of ACM SIGPLAN Notices","author":"Srivastava Prakalp","year":"2018","unstructured":"Prakalp Srivastava, Rakesh Komuravelli, Sarita Adve, Maria Kotsifakou, Matthew D Sinclair, and Vikram Adve. HPVM: heterogeneous parallel virtual machine. In Proc. of ACM SIGPLAN Notices, pages 68--80. ACM, March 2018."},{"key":"e_1_3_2_1_51_1","unstructured":"James Stanier and Des Watson. Intermediate representations in imperative compilers: A survey. ACM Comput. Surv."},{"key":"e_1_3_2_1_52_1","author":"Sujeeth Arvind K","year":"2014","unstructured":"Arvind K Sujeeth, Kevin J Brown, HyoukJoong Lee, Tiark Rompf, Hassan Chafi, Martin Odersky, and Kunle Olukotun. Delite - A Compiler Architecture for Performance-Oriented Embedded Domain-Specific Languages. ACM Trans. Embedded Comput. Syst., 2014.","journal-title":"ACM Trans. Embedded Comput. Syst."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3033019.3033027"},{"key":"e_1_3_2_1_54_1","volume-title":"A dataflow compiler substrate","author":"Traub Ken","year":"1991","unstructured":"Ken Traub, James Hicks, and Shail Aditya. A dataflow compiler substrate. 1991."},{"key":"e_1_3_2_1_55_1","volume-title":"Design of FPGA-Based Computing Systems with OpenCL.","author":"Waidyasooriya Hasitha Muthumala","year":"2017","unstructured":"Hasitha Muthumala Waidyasooriya, Masanori Hariyama, and Kunio Uchiyama. FPGA-Oriented Parallel Programming. In Design of FPGA-Based Computing Systems with OpenCL. October 2017."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2014.18"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00015"}],"event":{"name":"MICRO '52: The 52nd Annual IEEE\/ACM International Symposium on Microarchitecture","location":"Columbus OH USA","acronym":"MICRO '52","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing","IEEE CS"]},"container-title":["Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3352460.3358292","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3352460.3358292","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T22:28:47Z","timestamp":1753828127000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3352460.3358292"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10,12]]},"references-count":57,"alternative-id":["10.1145\/3352460.3358292","10.1145\/3352460"],"URL":"https:\/\/doi.org\/10.1145\/3352460.3358292","relation":{},"subject":[],"published":{"date-parts":[[2019,10,12]]},"assertion":[{"value":"2019-10-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}