{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T08:30:14Z","timestamp":1773304214362,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,11,5]],"date-time":"2018-11-05T00:00:00Z","timestamp":1541376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100002418","name":"Intel Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100002418","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,11,5]]},"DOI":"10.1145\/3240765.3240850","type":"proceedings-article","created":{"date-parts":[[2018,11,6]],"date-time":"2018-11-06T13:36:57Z","timestamp":1541511417000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":88,"title":["SODA"],"prefix":"10.1145","author":[{"given":"Yuze","family":"Chi","sequence":"first","affiliation":[{"name":"University of California"}]},{"given":"Jason","family":"Cong","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Peng","family":"Wei","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Peipei","family":"Zhou","sequence":"additional","affiliation":[{"name":"University of California"}]}],"member":"320","published-online":{"date-parts":[[2018,11,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/1717197"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897937.2897972"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2593069.2593090"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2015.2488491"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2011.2110592"},{"key":"e_1_3_2_1_6_1","volume-title":"Cody Hao Yu, and Peipei Zhou","author":"Cong Jason","year":"2018","unstructured":"Jason Cong, Peng Wei, Cody Hao Yu, and Peipei Zhou. 2018. Latte: Locality Aware Transformation for High-Level Synthesis. In FCCM. 125--128."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","unstructured":"Jason Cong Peng Zhang and Yi Zou. 2012. Optimizing Memory Hierarchy Allocation with Loop Transformations for High-level Synthesis. In DAC. 1233--1238. 10.1145\/2228360.2228586","DOI":"10.1145\/2228360.2228586"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/1413370.1413375"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","unstructured":"Juan Escobedo and Mingjie Lin. 2018. Graph-Theoretically Optimal Memory Banking for Stencil-Based Computing Kernels. In FPGA. 199--208. 10.1145\/3174243.3174251","DOI":"10.1145\/3174243.3174251"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01407835"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2601097.2601174"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","unstructured":"Gopalakrishna Hegde and Nachiket Kapre. 2015. Energy-Efficient Acceleration of OpenCV Saliency Computation Using Soft Vector Processors. In FCCM. 76--83. 10.1109\/FCCM.2015.39","DOI":"10.1109\/FCCM.2015.39"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","unstructured":"Justin Holewinski Louis-No\u00ebl Pouchet and P. Sadayappan. 2012. High-Performance Code Generation for Stencil Computations on GPU Architectures. In ICS. 311--320. 10.1145\/2304576.2304619","DOI":"10.1145\/2304576.2304619"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","unstructured":"Sriram Krishnamoorthy Muthu Baskaran Uday Bondhugula J. Ramanujam Atanas Rountev and P. Sadayappan. 2007. Effective Automatic Parallelization of Stencil Computations. In PLDI. 235--244. 10.1145\/1250734.1250761","DOI":"10.1145\/1250734.1250761"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Shih-Wei Liao Sheng-Jun Tsai Chieh-Hsun Yang and Chen-Kang Lo. 2016. Locality-Aware Scheduling for Stencil Code in Halide. In ICPPW. 72--77.","DOI":"10.1109\/ICPPW.2016.26"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063398"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2966986.2966995"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","unstructured":"Louis-No\u00ebl Pouchet Peng Zhang P. Sadayappan and Jason Cong. 2013. Polyhedral-Based Data Reuse Optimization for Configurable Computing. In FPGA. 29--38. 10.1145\/2435264.2435273","DOI":"10.1145\/2435264.2435273"},{"key":"e_1_3_2_1_19_1","unstructured":"Jing Pu Steven Bell Xuan Yang Jeff Setter Stephen Richardson Jonathan Ragan-Kelley and Mark Horowitz. 2016. Programming Heterogeneous Systems from an Image Processing DSL. (2016) 12 pages."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2185520.2185528"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","unstructured":"Oliver Reiche M. Akif Ozkan Richard Membarth J\u00fcrgen Teich and Frank Hannig. 2017. Generating FPGA-based Image Processing Accelerators with Hipacc: (Invited paper). In ICCAD. 1026--1033.","DOI":"10.5555\/3199700.3199842"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","unstructured":"Gerald Roth John Mellor-Crummey Ken Kennedy and R Gregg Brickner. 1997. Compiling Stencils in High Performance Fortran. In SC. 1--20. 10.1145\/509593.509605","DOI":"10.1145\/509593.509605"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2013.51"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Muhammad Shafiq Miquel Pericas Raul de la Cruz Mauricio Araya-Polo Nacho Navarro and Eduard Ayguad\u00e9. 2009. Exploiting Memory Customization in FPGA for 3D Stencil Computations. In FPT. 38--45.","DOI":"10.1109\/FPT.2009.5377644"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","unstructured":"Greg Stitt Abhay Gupta Madison N. Emas David Wilson and Austin Baylis. 2018. Scalable Window Generation for the Intel Broadwell + Arria 10 and High-Bandwidth FPGA Systems. In FPGA. 173--182. 10.1145\/3174243.3174262","DOI":"10.1145\/3174243.3174262"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","unstructured":"Kevin Stock Martin Kong Tobias Grosser Louis-No\u00ebl Pouchet Fabrice Rastello J. Ramanujam and P. Sadayappan. 2014. A Framework for Enhancing Data Reuse via Associative Reordering. In PLDI. 65--76. 10.1145\/2594291.2594342","DOI":"10.1145\/2594291.2594342"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1989493.1989508"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062185"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Markus Wittmann Georg Hager and Gerhard Wellein. 2010. Multicore-Aware Parallel Temporal Blocking of Stencil Codes for Shared and Distributed Memory. In IPDPSW. 1--7.","DOI":"10.1109\/IPDPSW.2010.5470813"},{"key":"e_1_3_2_1_31_1","volume-title":"Computation Theory of Cellular Automata. Communications in Mathematical Physics","author":"Wolfram Stephen","year":"1984","unstructured":"Stephen Wolfram. 1984. Computation Theory of Cellular Automata. Communications in Mathematical Physics (1984)."},{"key":"e_1_3_2_1_32_1","unstructured":"Xilinx. 2017. Vivado Design Suite: AXI Reference Guide (UG1037). https:\/\/www.xilinx.com\/support\/documentation\/ip_documentation\/axi_ref_guide\/latest\/ug1037-vivado-axi-reference-guide.pdf"},{"key":"e_1_3_2_1_33_1","unstructured":"Xilinx. 2018. Vivado High-Level Synthesis. https:\/\/www.xilinx.com\/products\/design-tools\/vivado\/integration\/esl-design.html"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","unstructured":"Hamid Reza Zohouri Artur Podobas and Satoshi Matsuoka. 2018. Combined Spatial and Temporal Blocking for High-Performance Stencil Computation on FPGAs Using OpenCL. In FPGA. 153--162. 10.1145\/3174243.3174248","DOI":"10.1145\/3174243.3174248"}],"event":{"name":"ICCAD '18: IEEE\/ACM INTERNATIONAL CONFERENCE ON COMPUTER-AIDED DESIGN","location":"San Diego California","acronym":"ICCAD '18","sponsor":["IEEE-EDS Electronic Devices Society","IEEE CAS","IEEE CEDA"]},"container-title":["Proceedings of the International Conference on Computer-Aided Design"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3240765.3240850","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3240765.3240850","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3240765.3240850","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:57:33Z","timestamp":1750208253000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3240765.3240850"}},"subtitle":["stencil with optimized dataflow architecture"],"short-title":[],"issued":{"date-parts":[[2018,11,5]]},"references-count":34,"alternative-id":["10.1145\/3240765.3240850","10.1145\/3240765"],"URL":"https:\/\/doi.org\/10.1145\/3240765.3240850","relation":{},"subject":[],"published":{"date-parts":[[2018,11,5]]},"assertion":[{"value":"2018-11-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}