{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T01:12:56Z","timestamp":1780708376077,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,2,17]],"date-time":"2021-02-17T00:00:00Z","timestamp":1613520000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Intel Corporation and NSF (National Science Foundation)","award":["CCF-1723773"],"award-info":[{"award-number":["CCF-1723773"]}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CCF-1937599"],"award-info":[{"award-number":["CCF-1937599"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,2,17]]},"DOI":"10.1145\/3431920.3439301","type":"proceedings-article","created":{"date-parts":[[2021,2,20]],"date-time":"2021-02-20T23:15:47Z","timestamp":1613862947000},"page":"116-126","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":65,"title":["HBM Connect: High-Performance HLS Interconnect for FPGA HBM"],"prefix":"10.1145","author":[{"given":"Young-kyu","family":"Choi","sequence":"first","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuze","family":"Chi","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weikang","family":"Qiao","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nikola","family":"Samardzic","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jason","family":"Cong","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2021,2,17]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"ARM. 2011. https:\/\/developer.arm.com\/docs\/ihi0022\/dAMBA AXI and ACE Protocol Specification AXI3 AXI4 and AXI4-Lite ACE and ACE-Lite. www.arm.com  ARM. 2011. https:\/\/developer.arm.com\/docs\/ihi0022\/dAMBA AXI and ACE Protocol Specification AXI3 AXI4 and AXI4-Lite ACE and ACE-Lite. www.arm.com"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2010.135"},{"key":"e_1_3_2_2_3_1","volume-title":"Proc. ACM\/SIGDA Int. Symp. Field-Programmable Gate Arrays. 240--249","author":"Chen R.","unstructured":"R. Chen , S. Siriyal , and V. Prasanna . 2015. https:\/\/dl.acm.org\/doi\/abs\/10.1145\/2684746.2689068Energy and memory efficient mapping of bitonic sorting on FPGA . In Proc. ACM\/SIGDA Int. Symp. Field-Programmable Gate Arrays. 240--249 . R. Chen, S. Siriyal, and V. Prasanna. 2015. https:\/\/dl.acm.org\/doi\/abs\/10.1145\/2684746.2689068Energy and memory efficient mapping of bitonic sorting on FPGA. In Proc. ACM\/SIGDA Int. Symp. Field-Programmable Gate Arrays. 240--249."},{"key":"e_1_3_2_2_4_1","volume-title":"FPGA HBM: Benchmarking and bandwidth optimization. ArXiv Preprint","author":"Choi Y.","year":"2020","unstructured":"Y. Choi , Y. Chi , J. Wang , L. Guo , and J. Cong . 2020 . When HLS meets FPGA HBM: Benchmarking and bandwidth optimization. ArXiv Preprint (2020). https:\/\/arxiv.org\/abs\/2010.06075 Y. Choi, Y. Chi, J. Wang, L. Guo, and J. Cong. 2020. When HLS meets FPGA HBM: Benchmarking and bandwidth optimization. ArXiv Preprint (2020). https:\/\/arxiv.org\/abs\/2010.06075"},{"key":"e_1_3_2_2_5_1","volume-title":"Proc. Ann. Design Automation Conf. 109--114","author":"Choi Y.","unstructured":"Y. Choi , J. Cong , Z. Fang , Y. Hao , G. Reinman , and P. Wei . 2016. http:\/\/dl.acm.org\/citation.cfm?id=2897972 A quantitative analysis on microarchitectures of modern CPU-FPGA platform . In Proc. Ann. Design Automation Conf. 109--114 . Y. Choi, J. Cong, Z. Fang, Y. Hao, G. Reinman, and P. Wei. 2016. http:\/\/dl.acm.org\/citation.cfm?id=2897972 A quantitative analysis on microarchitectures of modern CPU-FPGA platform. In Proc. Ann. Design Automation Conf. 109--114."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3294054"},{"key":"e_1_3_2_2_7_1","volume-title":"Proc. IEEE\/ACM Int. Conf. Computer-Aided Design. 691--698","author":"Choi Y.","unstructured":"Y. Choi , P. Zhang , P. Li , and J. Cong . 2017. https:\/\/ieeexplore.ieee.org\/document\/8203844HLScope+: Fast and accurate performance estimation for FPGA HLS . In Proc. IEEE\/ACM Int. Conf. Computer-Aided Design. 691--698 . Y. Choi, P. Zhang, P. Li, and J. Cong. 2017. https:\/\/ieeexplore.ieee.org\/document\/8203844HLScope+: Fast and accurate performance estimation for FPGA HLS. In Proc. IEEE\/ACM Int. Conf. Computer-Aided Design. 691--698."},{"key":"e_1_3_2_2_8_1","volume-title":"IEEE Ann. Int. Symp. Field-Programmable Custom Computing Machines. 93--96","author":"Cong J.","unstructured":"J. Cong , Z. Fang , M. Lo , H. Wang , J. Xu , and S. Zhang . 2018. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8457638 Understanding performance differences of FPGAs and GPUs . In IEEE Ann. Int. Symp. Field-Programmable Custom Computing Machines. 93--96 . J. Cong, Z. Fang, M. Lo, H. Wang, J. Xu, and S. Zhang. 2018. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8457638 Understanding performance differences of FPGAs and GPUs. In IEEE Ann. Int. Symp. Field-Programmable Custom Computing Machines. 93--96."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2659000"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2009.179"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"W. J. Dally and C. L. Seitz. 1987. https:\/\/ieeexplore.ieee.org\/document\/1676939 Deadlock-free message routing in multiprocessor interconnection networks. IEEE Trans. Computers Vol. C-36 5 (May 1987) 547--553.  W. J. Dally and C. L. Seitz. 1987. https:\/\/ieeexplore.ieee.org\/document\/1676939 Deadlock-free message routing in multiprocessor interconnection networks. IEEE Trans. Computers Vol. C-36 5 (May 1987) 547--553.","DOI":"10.1109\/TC.1987.1676939"},{"key":"e_1_3_2_2_12_1","volume-title":"Int. Conf. Formal Methods and Models for Co-Design .","author":"Fleming K.","unstructured":"K. Fleming , M. King , and M. C. Ng . 2008. https:\/\/ieeexplore.ieee.org\/abstract\/document\/4547704High-throughput pipelined mergesort . In Int. Conf. Formal Methods and Models for Co-Design . K. Fleming, M. King, and M. C. Ng. 2008. https:\/\/ieeexplore.ieee.org\/abstract\/document\/4547704High-throughput pipelined mergesort. In Int. Conf. Formal Methods and Models for Co-Design ."},{"key":"e_1_3_2_2_13_1","unstructured":"Intel. 2020 a. https:\/\/www.intel.com\/content\/dam\/www\/programmable\/us\/en\/pdfs\/literature\/ug\/ug-20031.pdfHigh Bandwidth Memory (HBM2) Interface Intel FPGA IP User Guide. https:\/\/www.intel.com\/  Intel. 2020 a. https:\/\/www.intel.com\/content\/dam\/www\/programmable\/us\/en\/pdfs\/literature\/ug\/ug-20031.pdfHigh Bandwidth Memory (HBM2) Interface Intel FPGA IP User Guide. https:\/\/www.intel.com\/"},{"key":"e_1_3_2_2_14_1","unstructured":"Intel. 2020 b. https:\/\/www.intel.com\/content\/www\/us\/en\/programmable\/documentation\/nik1412467993397.htmlAvalon Interface Specifications. https:\/\/www.intel.com\/  Intel. 2020 b. https:\/\/www.intel.com\/content\/www\/us\/en\/programmable\/documentation\/nik1412467993397.htmlAvalon Interface Specifications. https:\/\/www.intel.com\/"},{"key":"e_1_3_2_2_15_1","unstructured":"JEDEC. 2020. High Bandwidth Memory (HBM) DRAM. https:\/\/www.jedec.org\/standards-documents\/docs\/jesd235a  JEDEC. 2020. High Bandwidth Memory (HBM) DRAM. https:\/\/www.jedec.org\/standards-documents\/docs\/jesd235a"},{"key":"e_1_3_2_2_16_1","volume-title":"Proc. IEEE Int. Memory Workshop. 1--4.","author":"Jun H.","unstructured":"H. Jun , J. Cho , K. Lee , H. Son , K. Kim , H. Jin , and K. Kim . 2017. https:\/\/ieeexplore.ieee.org\/abstract\/document\/7939084 HBM (High Bandwidth Memory) DRAM technology and architecture . In Proc. IEEE Int. Memory Workshop. 1--4. H. Jun, J. Cho, K. Lee, H. Son, K. Kim, H. Jin, and K. Kim. 2017. https:\/\/ieeexplore.ieee.org\/abstract\/document\/7939084 HBM (High Bandwidth Memory) DRAM technology and architecture. In Proc. IEEE Int. Memory Workshop. 1--4."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2834439"},{"key":"e_1_3_2_2_18_1","unstructured":"R. Li H. Huang Z. Wang Z. Shao X. Liao and H. Jin. 2020. Optimizing memory performance of Xilinx FPGAs under Vitis. ArXiv Preprint (2020). https:\/\/arxiv.org\/abs\/2010.08916  R. Li H. Huang Z. Wang Z. Shao X. Liao and H. Jin. 2020. Optimizing memory performance of Xilinx FPGAs under Vitis. ArXiv Preprint (2020). https:\/\/arxiv.org\/abs\/2010.08916"},{"key":"e_1_3_2_2_19_1","volume-title":"Proc. ACM\/SIGDA Int. Symp. Field-Programmable Gate Arrays .","author":"Lu A.","unstructured":"A. Lu , Z. Fang , W. Liu , and L. Shannon . 2021. Demystifying the memory system of modern datacenter FPGAs for software programmers through microbenchmarking . In Proc. ACM\/SIGDA Int. Symp. Field-Programmable Gate Arrays . A. Lu, Z. Fang, W. Liu, and L. Shannon. 2021. Demystifying the memory system of modern datacenter FPGAs for software programmers through microbenchmarking. In Proc. ACM\/SIGDA Int. Symp. Field-Programmable Gate Arrays ."},{"key":"e_1_3_2_2_20_1","volume-title":"Proc. Int. Conf. Architectural Support for Programming Languages and Operating Systems. 167--181","author":"Miao H.","unstructured":"H. Miao , M. Jeon , G. Pekhimenko , K. S. McKinley , and F. X. Lin . 2019. https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3297858.3304031 Strea-HBM: Stream analytics on high bandwidth hybrid memory . In Proc. Int. Conf. Architectural Support for Programming Languages and Operating Systems. 167--181 . H. Miao, M. Jeon, G. Pekhimenko, K. S. McKinley, and F. X. Lin. 2019. https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3297858.3304031 Strea-HBM: Stream analytics on high bandwidth hybrid memory. In Proc. Int. Conf. Architectural Support for Programming Languages and Operating Systems. 167--181."},{"key":"e_1_3_2_2_21_1","volume-title":"Proc. Workshop on Memory Systems Performance and Correctness. 1--10","author":"Molka D.","unstructured":"D. Molka , D. Hackenberg , and R. Sch\u00f6ne . 2014. https:\/\/dl.acm.org\/doi\/abs\/10.1145\/2618128.2618129 Main memory and cache performance of Intel Sandy Bridge and AMD Bulldozer . In Proc. Workshop on Memory Systems Performance and Correctness. 1--10 . D. Molka, D. Hackenberg, and R. Sch\u00f6ne. 2014. https:\/\/dl.acm.org\/doi\/abs\/10.1145\/2618128.2618129 Main memory and cache performance of Intel Sandy Bridge and AMD Bulldozer. In Proc. Workshop on Memory Systems Performance and Correctness. 1--10."},{"key":"e_1_3_2_2_22_1","volume-title":"Y. T. Liew, K. Srivatsan, D. Moss, S. Subhaschandra, and G. Boudoukh.","author":"Nurvitadhi E.","year":"2017","unstructured":"E. Nurvitadhi , G. Venkatesh , J. Sim , D. Marr , R. Huang , J. Ong Gee Hock , Y. T. Liew, K. Srivatsan, D. Moss, S. Subhaschandra, and G. Boudoukh. 2017 . https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3020078.3021740Can FPGAs beat GPUs in accelerating next-generation deep neural networks?. In Proc. ACM\/SIGDA Int. Symp. Field-Programmable Gate Arrays . 5--14. E. Nurvitadhi, G. Venkatesh, J. Sim, D. Marr, R. Huang, J. Ong Gee Hock, Y. T. Liew, K. Srivatsan, D. Moss, S. Subhaschandra, and G. Boudoukh. 2017. https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3020078.3021740Can FPGAs beat GPUs in accelerating next-generation deep neural networks?. In Proc. ACM\/SIGDA Int. Symp. Field-Programmable Gate Arrays. 5--14."},{"key":"e_1_3_2_2_23_1","unstructured":"Nvidia. 2020. Nvidia Titan V. https:\/\/www.nvidia.com\/en-us\/titan\/titan-v\/  Nvidia. 2020. Nvidia Titan V. https:\/\/www.nvidia.com\/en-us\/titan\/titan-v\/"},{"key":"e_1_3_2_2_24_1","first-page":"11","article-title":"https:\/\/ieeexplore.ieee.org\/abstract\/document\/1336763 Performance and area modeling of complete FPGA designs in the presence of loop transformations","volume":"53","author":"Park J.","year":"2004","unstructured":"J. Park , P. Diniz , and K. Shayee . 2004 . https:\/\/ieeexplore.ieee.org\/abstract\/document\/1336763 Performance and area modeling of complete FPGA designs in the presence of loop transformations . IEEE Trans. Computers , Vol. 53 , 11 (Sept. 2004), 1420--1435. J. Park, P. Diniz, and K. Shayee. 2004. https:\/\/ieeexplore.ieee.org\/abstract\/document\/1336763 Performance and area modeling of complete FPGA designs in the presence of loop transformations. IEEE Trans. Computers, Vol. 53, 11 (Sept. 2004), 1420--1435.","journal-title":"IEEE Trans. Computers"},{"key":"e_1_3_2_2_25_1","volume-title":"IEEE Ann. Int. Symp. Field-Programmable Custom Computing Machines. 197--204","author":"Saitoh M.","unstructured":"M. Saitoh , E. A. Elsayed , T. V. Chu , S. Mashimo , and K. Kise . 2018. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8457653 A high-performance and cost-effective hardware merge sorter without feedback datapath . In IEEE Ann. Int. Symp. Field-Programmable Custom Computing Machines. 197--204 . M. Saitoh, E. A. Elsayed, T. V. Chu, S. Mashimo, and K. Kise. 2018. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8457653 A high-performance and cost-effective hardware merge sorter without feedback datapath. In IEEE Ann. Int. Symp. Field-Programmable Custom Computing Machines. 197--204."},{"key":"e_1_3_2_2_26_1","volume-title":"Ann. Int. Symp. Comput. Architecture. 282--294","author":"Samardzic N.","unstructured":"N. Samardzic , W. Qiao , V. Aggarwal , M. F. Chang , and J. Cong . 2020. https:\/\/www.iscaconf.org\/isca2020\/papers\/466100a282.pdf Bonsai: High-performance adaptive merge tree sorting . In Ann. Int. Symp. Comput. Architecture. 282--294 . N. Samardzic, W. Qiao, V. Aggarwal, M. F. Chang, and J. Cong. 2020. https:\/\/www.iscaconf.org\/isca2020\/papers\/466100a282.pdf Bonsai: High-performance adaptive merge tree sorting. In Ann. Int. Symp. Comput. Architecture. 282--294."},{"key":"e_1_3_2_2_27_1","volume-title":"IEEE Ann. Int. Symp. Field-Programmable Custom Computing Machines .","author":"Wang Z.","unstructured":"Z. Wang , H. Huang , J. Zhang , and G. Alonso . 2020. https:\/\/wangzeke.github.io\/doc\/shuhai_fccm_20.pdf Shuhai: Benchmarking High Bandwidth Memory on FPGAs . In IEEE Ann. Int. Symp. Field-Programmable Custom Computing Machines . Z. Wang, H. Huang, J. Zhang, and G. Alonso. 2020. https:\/\/wangzeke.github.io\/doc\/shuhai_fccm_20.pdf Shuhai: Benchmarking High Bandwidth Memory on FPGAs. In IEEE Ann. Int. Symp. Field-Programmable Custom Computing Machines ."},{"key":"e_1_3_2_2_28_1","unstructured":"Xilinx. 2020 a. Alveo U280 Data Center Accelerator Card User Guide. https:\/\/www.xilinx.com\/support\/documentation\/boards_and_kits\/accelerator-cards\/ug1314-u280-reconfig-accel.pdf  Xilinx. 2020 a. Alveo U280 Data Center Accelerator Card User Guide. https:\/\/www.xilinx.com\/support\/documentation\/boards_and_kits\/accelerator-cards\/ug1314-u280-reconfig-accel.pdf"},{"key":"e_1_3_2_2_29_1","unstructured":"Xilinx. 2020 b. Alveo U50 Data Center Accelerator Card User Guide. https:\/\/www.xilinx.com\/support\/documentation\/boards_and_kits\/accelerator-cards\/ug1371-u50-reconfig-accel.pdf  Xilinx. 2020 b. Alveo U50 Data Center Accelerator Card User Guide. https:\/\/www.xilinx.com\/support\/documentation\/boards_and_kits\/accelerator-cards\/ug1371-u50-reconfig-accel.pdf"},{"key":"e_1_3_2_2_30_1","unstructured":"Xilinx. 2020 c. AXI High Bandwidth Memory Controller v1.0. https:\/\/www.xilinx.com\/support\/documentation\/ip_documentation\/hbm\/v1_0\/pg276-axi-hbm.pdf  Xilinx. 2020 c. AXI High Bandwidth Memory Controller v1.0. https:\/\/www.xilinx.com\/support\/documentation\/ip_documentation\/hbm\/v1_0\/pg276-axi-hbm.pdf"},{"key":"e_1_3_2_2_31_1","unstructured":"Xilinx. 2020 d. https:\/\/www.xilinx.com\/support\/documentation\/sw_manuals\/xilinx2019_2\/ug902-vivado-high-level-synthesis.pdfVivado High-level Synthesis (UG902). https:\/\/www.xilinx.com\/  Xilinx. 2020 d. https:\/\/www.xilinx.com\/support\/documentation\/sw_manuals\/xilinx2019_2\/ug902-vivado-high-level-synthesis.pdfVivado High-level Synthesis (UG902). https:\/\/www.xilinx.com\/"},{"key":"e_1_3_2_2_32_1","unstructured":"Xilinx. 2020 e. https:\/\/www.xilinx.com\/support\/documentation\/user_guides\/ug573-ultrascale-memory-resources.pdfUltraScale Architecture Memory Resources (UG573). https:\/\/www.xilinx.com\/  Xilinx. 2020 e. https:\/\/www.xilinx.com\/support\/documentation\/user_guides\/ug573-ultrascale-memory-resources.pdfUltraScale Architecture Memory Resources (UG573). https:\/\/www.xilinx.com\/"},{"key":"e_1_3_2_2_33_1","unstructured":"Xilinx. 2020 f. Vitis Unified Software Platform. https:\/\/www.xilinx.com\/products\/design-tools\/vitis\/vitis-platform.html  Xilinx. 2020 f. Vitis Unified Software Platform. https:\/\/www.xilinx.com\/products\/design-tools\/vitis\/vitis-platform.html"}],"event":{"name":"FPGA '21: The 2021 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays","location":"Virtual Event USA","acronym":"FPGA '21","sponsor":["SIGDA ACM Special Interest Group on Design Automation"]},"container-title":["The 2021 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3431920.3439301","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3431920.3439301","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3431920.3439301","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:31:31Z","timestamp":1750195891000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3431920.3439301"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,17]]},"references-count":33,"alternative-id":["10.1145\/3431920.3439301","10.1145\/3431920"],"URL":"https:\/\/doi.org\/10.1145\/3431920.3439301","relation":{},"subject":[],"published":{"date-parts":[[2021,2,17]]},"assertion":[{"value":"2021-02-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}