{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,23]],"date-time":"2025-03-23T19:10:24Z","timestamp":1742757024367,"version":"3.40.2"},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642283642"},{"type":"electronic","value":"9783642283659"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-28365-9_3","type":"book-chapter","created":{"date-parts":[[2012,3,22]],"date-time":"2012-03-22T20:56:38Z","timestamp":1332449798000},"page":"26-39","source":"Crossref","is-referenced-by-count":11,"title":["Domain-Specific Language and Compiler for Stencil Computation on FPGA-Based Systolic Computational-Memory Array"],"prefix":"10.1007","author":[{"given":"Wang","family":"Luzhou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kentaro","family":"Sano","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Satoru","family":"Yamamoto","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"3_CR1","unstructured":"Boost C++ Library, http:\/\/www.boost.org"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Chamberlain, B.L., Snyder, L.: Array language support for parallel sparse computation. In: Proceedings of the 15th International Conference on Supercomputing, pp. 133\u2013145 (June 2001)","DOI":"10.1145\/377792.377820"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Datta, K., Murphy, M., Volkov, V., Williams, S., Carter, J., Oliker, L., Patterson, D., Shalf, J., Yelick, K.: Stencil computation optimization and auto-tuning on state-of-the-art multicore architectures. In: Proceedings of the 2008 ACM\/IEEE Conference on Supercomputing, pp. 1\u201312 (November 2008)","DOI":"10.1109\/SC.2008.5222004"},{"issue":"1","key":"3_CR4","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1109\/54.748803","volume":"16","author":"D.G. Elliott","year":"1999","unstructured":"Elliott, D.G., Stumm, M., Snelgrove, W., Cojocaru, C., Mckenzie, R.: Computational ram: Implementing processors in memory. Design & Test of Computers\u00a016(1), 32\u201341 (1999)","journal-title":"Design & Test of Computers"},{"key":"3_CR5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-97651-3","volume-title":"Computational Methods for Fluid Dynamics","author":"J.H. Ferziger","year":"1996","unstructured":"Ferziger, J.H., Peri\u0107, M.: Computational Methods for Fluid Dynamics. Springer, Heidelberg (1996)"},{"key":"3_CR6","unstructured":"Hageman, L.A., Young, D.M.: Applied Iterative Methods. Academic Press (1981)"},{"issue":"1","key":"3_CR7","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1109\/MC.1982.1653825","volume":"15","author":"H.T. Kung","year":"1982","unstructured":"Kung, H.T.: Why systolic architecture? Computer\u00a015(1), 37\u201346 (1982)","journal-title":"Computer"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Luzhou, W., Sano, K., Yamamoto, S.: Local-and-global stall mechanism for systolic computational-memory array on extensible multi-fpga system. In: Proceedings of the International Conference on Field-Programmable Technology (FPT 2010), pp. 102\u2013109 (December 2010)","DOI":"10.1109\/FPT.2010.5681763"},{"key":"3_CR9","unstructured":"Mycroft, D.O.A.: Efficient and correct stencil computation via pattern matching and static typing. In: Proceedings of IFIP Working Conference on Domain-Specific Languages (September 2011) (to appear)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Sano, K., Iizuka, T., Yamamoto, S.: Systolic architecture for computational fluid dynamics on FPGAs. In: Proceedings of the 15th Annual IEEE Symposium on Field-Programmable Custom Computing Machines (FCCM), pp. 107\u2013116 (April 2007)","DOI":"10.1109\/FCCM.2007.20"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Sano, K., Luzhou, W., Hatsuda, Y., Iizuka, T., Yamamoto, S.: FPGA-array with bandwidth-reduction mechanism for scalable and power-efficient numerical simulations based on finite difference methods. ACM Transactions on Reconfigurable Technology and Systems\u00a03(4) (November 2010), doi:10.1145\/1862648.1862651","DOI":"10.1145\/1862648.1862651"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Tang, Y., Chowdhury, R., Kuszmaul, B.C., Luk, C.K., Leiserson, C.E.: The pochoir stencil compiler. In: Proceedings of the 23th ACM Symposium on Parallelism in Algorithms and Architectures (June 2011)","DOI":"10.1145\/1989493.1989508"},{"key":"3_CR13","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1007935215591","volume":"17","author":"J. Teich","year":"1997","unstructured":"Teich, J., Thiele, L.: Partitioning processor arrays under resource constrains. Journal of VLSI Signal Processing\u00a017, 5\u201320 (1997)","journal-title":"Journal of VLSI Signal Processing"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Underwood, K.D., Hemmert, K.S.: Closing the gap: CPU and FPGA trends in sustainable floating-point blas performance. In: Proceedings of the IEEE Symposium on Field-Programmable Custom Computing Machines, pp. 219\u2013228 (2004)","DOI":"10.1109\/FCCM.2004.21"},{"issue":"4","key":"3_CR15","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S. Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for multicore architectures. Communications of the ACM\u00a052(4), 65\u201376 (2009)","journal-title":"Communications of the ACM"},{"issue":"4","key":"3_CR16","doi-asserted-by":"publisher","first-page":"452","DOI":"10.1109\/71.97902","volume":"2","author":"M.E. Wolf","year":"1991","unstructured":"Wolf, M.E., Lam, M.S.: A loop transformation theory and an algorithm to maximize parallelism. IEEE Transactions on Parallel and Distributed Systems\u00a02(4), 452\u2013471 (1991)","journal-title":"IEEE Transactions on Parallel and Distributed Systems"}],"container-title":["Lecture Notes in Computer Science","Reconfigurable Computing: Architectures, Tools and Applications"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-28365-9_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,23]],"date-time":"2025-03-23T18:53:01Z","timestamp":1742755981000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-28365-9_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642283642","9783642283659"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-28365-9_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}