{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T08:03:12Z","timestamp":1777449792066,"version":"3.51.4"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. Comput. Sci. Technol."],"published-print":{"date-parts":[[2018,1]]},"DOI":"10.1007\/s11390-017-1748-5","type":"journal-article","created":{"date-parts":[[2018,1,30]],"date-time":"2018-01-30T02:44:48Z","timestamp":1517280288000},"page":"116-130","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["A Pipelining Loop Optimization Method for Dataflow Architecture"],"prefix":"10.1007","volume":"33","author":[{"given":"Xu","family":"Tan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao-Chun","family":"Ye","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao-Wei","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuan-Chao","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Da","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lunkai","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wen-Ming","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dong-Rui","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhi-Min","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,1,26]]},"reference":[{"issue":"1","key":"1748_CR1","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1109\/MC.2012.34","volume":"45","author":"M Tolentino","year":"2012","unstructured":"Tolentino M, Cameron K W. The optimist, the pessimist, and the global race to exascale in 20 megawatts. Computer, 2012, 45(1): 95-97.","journal-title":"Computer"},{"issue":"2","key":"1748_CR2","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1109\/MSPEC.2011.5693074","volume":"48","author":"P Kogge","year":"2011","unstructured":"Kogge P. The tops in flops. IEEE Spectrum, 2011, 48(2): 48-54.","journal-title":"IEEE Spectrum"},{"key":"1748_CR3","unstructured":"Kogge P, Bergman K, Borkar S et al. ExaScale computing study: Technology challenges in achieving exascale systems. Technical Report TR-2008-13, Defense Advanced Research Projects Agency Information Processing Technigues Office, 2008. http:\/\/www.citeulike.org\/group\/11430\/article\/6638217 , Dec. 2017."},{"key":"1748_CR4","doi-asserted-by":"crossref","unstructured":"Milutinovi V, Salom J, Trifunovic N, Giorgi R. Guide to DataFlow Supercomputing: Basic Concepts, Case Studies, and a Detailed Example. Springer, 2015.","DOI":"10.1007\/978-3-319-16229-4"},{"key":"1748_CR5","doi-asserted-by":"crossref","unstructured":"Dennis J B. First version of a data flow procedure language. In Proc. the Programming Symp., April 1974, pp.362-376.","DOI":"10.1007\/3-540-06859-7_145"},{"key":"1748_CR6","doi-asserted-by":"crossref","unstructured":"Oriato D, Tilbury S, Marrocu M, Pusceddu G. Acceleration of a meteorological limited area model with dataflow engines. In Proc. Symp. Application Accelerators in High Performance Computing, July 2012, pp.129-132.","DOI":"10.1109\/SAAHPC.2012.8"},{"key":"1748_CR7","doi-asserted-by":"crossref","unstructured":"Pratas F, Oriato D, Pell O, Mata R A, Sousa L. Accelerating the computation of induced dipoles for molecular mechanics with dataflow engines. In Proc. the 21st IEEE Annual Int. Symp. Field-Programmable Custom Computing Machines, April 2013, pp.177-180.","DOI":"10.1109\/FCCM.2013.34"},{"issue":"1","key":"1748_CR8","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/MM.2013.111","volume":"34","author":"HH Fu","year":"2014","unstructured":"Fu H H, Gan L, Clapp R G et al. Scaling reverse time migration performance through reconfigurable dataflow engines. IEEE Micro, 2014, 34(1): 30-40.","journal-title":"IEEE Micro"},{"key":"1748_CR9","unstructured":"Ackerman W B, Dennis J B. VAL\u2013A value-oriented algorithmic language: Preliminary reference manual. Technical Report TR-218, Computation Structure Group, Laboratory for Computer Science, MIT, 1979. http:\/\/citeseerxist.psu.edu\/showciting?cid=928490 , Dec. 2017."},{"issue":"7","key":"1748_CR10","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1109\/MC.2004.65","volume":"37","author":"D Burger","year":"2004","unstructured":"Burger D, Keckler S W, McKinley K S, Dahlin M, John L K, Lin C, Moore C R, Burrill J, McDonald R G, Yoder W. Scaling to the end of silicon with edge architectures. Computer, 2004, 37(7): 44-55.","journal-title":"Computer"},{"key":"1748_CR11","unstructured":"Arvind N, Gostelow K, Plouffe W. An asynchronous programming language and computing machine. Technical Report TR114a, Department of Information and Computer Science, University of California, 1978."},{"issue":"3","key":"1748_CR12","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/12.48862","volume":"39","author":"K Arvind","year":"1990","unstructured":"Arvind K, Nikhil R S. Executing a program on the MIT tagged-token dataflow architecture. IEEE Trans. Computers, 1990, 39(3): 300-318.","journal-title":"IEEE Trans. Computers"},{"key":"1748_CR13","doi-asserted-by":"crossref","unstructured":"Swanson S, Schwerin A, Mercaldi M et al. The wavescalar architecture. ACM Trans. Computer Systems, 2007, 25(2): Article No. 4.","DOI":"10.1145\/1233307.1233308"},{"key":"1748_CR14","doi-asserted-by":"crossref","unstructured":"Zuckerman S, Suetterlein J, Knauerhase R, Gao G R. Position paper: Using a \u201ccodelet\u201d program execution model for exascale machines. In Proc. the 1st Int. Workshop on Adaptive Self-Tuning Computing Systems for the Exaflop Era, June 2011, pp.64-69.","DOI":"10.1145\/2000417.2000424"},{"key":"1748_CR15","doi-asserted-by":"crossref","unstructured":"Suettlerlein J, Zuckerman S, Gao G R. An implementation of the codelet model. In Proc. the 19th Int. Conf. Parallel Processing, August 2013, pp.633-644.","DOI":"10.1007\/978-3-642-40047-6_63"},{"issue":"4","key":"1748_CR16","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1109\/MCSE.2012.78","volume":"14","author":"O Pell","year":"2012","unstructured":"Pell O, Averbukh V. Maximum performance computing with dataflow engines. Computing in Science & Engineering, 2012, 14(4): 98-103.","journal-title":"Computing in Science & Engineering"},{"issue":"3","key":"1748_CR17","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1145\/2678373.2665703","volume":"42","author":"D Voitsechov","year":"2014","unstructured":"Voitsechov D, Etsion Y. Single-graph multiple flows: Energy efficient design alternative for GPGPUs. ACM SIGARCH Computer Architecture News, 2014, 42(3): 205-216.","journal-title":"ACM SIGARCH Computer Architecture News"},{"issue":"1","key":"1748_CR18","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1145\/2465.2468","volume":"28","author":"JR Gurd","year":"1985","unstructured":"Gurd J R, Kirkham C C, Watson I. The Manchester prototype dataflow computer. Communications of the ACM, 1985, 28(1): 34-52.","journal-title":"Communications of the ACM"},{"issue":"1","key":"1748_CR19","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/s11390-017-1703-5","volume":"32","author":"XW Shen","year":"2017","unstructured":"Shen X W, Ye X C, Tan X, Wang D, Zhang L K, Li W M, Zhang Z M, Fan D R, Sun N H. An efficient network-onchip router for dataflow architecture. Journal of Computer Science and Technology, 2017, 32(1): 11-25.","journal-title":"Journal of Computer Science and Technology"},{"issue":"1","key":"1748_CR20","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1007\/s11390-017-1747-6","volume":"33","author":"X Tan","year":"2018","unstructured":"Tan X, Shen X W, Ye X C, Wang D, Fan D R, Zhang L K, Li W M, Zhang Z M, Tang Z M. A non-stop double buffering mechanism for dataflow architecture. Journal of Computer Science and Technology, 2018, 33(1): 145-157.","journal-title":"Journal of Computer Science and Technology"},{"key":"1748_CR21","doi-asserted-by":"crossref","unstructured":"Ye X C, Fan D R, Sun N H et al. SimICT: A fast and flexible framework for performance and power evaluation of large-scale architecture. In Proc. Int. Symp. Low Power Electronics and Design, September 2013, pp.273-278.","DOI":"10.1109\/ISLPED.2013.6629308"},{"key":"1748_CR22","doi-asserted-by":"crossref","unstructured":"Nguyen A, Satish N, Chhugani J et al. 3.5-D blocking optimization for stencil computations on modern CPUs and GPUs. In Proc. ACM\/IEEE Int. Conf. for High Performance Computing Networking Storage and Analysis, Nov. 2010.","DOI":"10.1109\/SC.2010.2"},{"issue":"11","key":"1748_CR23","doi-asserted-by":"publisher","first-page":"2045","DOI":"10.1109\/TPDS.2011.311","volume":"23","author":"J Kurzak","year":"2012","unstructured":"Kurzak J, Tomov S, Dongarra J. Autotuning GEMM kernels for the Fermi GPU. IEEE Trans. Parallel and Distributed Systems, 2012, 23(11): 2045-2057.","journal-title":"IEEE Trans. Parallel and Distributed Systems"},{"issue":"3","key":"1748_CR24","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1109\/TPDS.2012.160","volume":"24","author":"YP Zhang","year":"2013","unstructured":"Zhang Y P, Mueller F. Autogeneration and autotuning of 3D stencil codes on homogeneous and heterogeneous GPU clusters. IEEE Trans. Parallel and Distributed Systems, 2013, 24(3): 417-427.","journal-title":"IEEE Trans. Parallel and Distributed Systems"},{"key":"1748_CR25","doi-asserted-by":"crossref","unstructured":"del Mundo C, Feng W C. Towards a performance-portable FFT library for heterogeneous computing. In Proc. the 11th ACM Conf. Computing Frontiers, May 2014, Article No.11.","DOI":"10.1145\/2597917.2597943"},{"key":"1748_CR26","doi-asserted-by":"crossref","unstructured":"Li S, Ahn J H, Strong R D, Brockman J B, Tullsen D M, Jouppi N P. McPAT: An integrated power, area, and timing modeling framework for multicore and manycore architectures. In Proc. the 42nd Annual IEEE\/ACM Int. Symp. Microarchitecture, Dec. 2009, pp.469-480.","DOI":"10.1145\/1669112.1669172"},{"key":"1748_CR27","doi-asserted-by":"crossref","unstructured":"Naffziger S. High-performance processors in a power-limited world. In Proc. Symp. VLSI Circuits Digest of Technical Papers, June 2006, pp.93-97.","DOI":"10.1109\/VLSIC.2006.1705327"},{"key":"1748_CR28","doi-asserted-by":"crossref","unstructured":"Solinas M, Badia R M, Bodin F et al. The TERAFLUX project: Exploiting the dataflow paradigm in next generation teradevices. In Proc. Euromicro Conf. Digital System Design, September 2013, pp.272-279.","DOI":"10.1109\/DSD.2013.39"},{"key":"1748_CR29","doi-asserted-by":"crossref","unstructured":"Carter N P, Agrawal A, Borkar S et al. Runnemede: An architecture for ubiquitous high-performance computing. In Proc. the 19th IEEE Int. Symp. High Performance Computer Architecture, February 2013, pp.198-209.","DOI":"10.1109\/HPCA.2013.6522319"}],"container-title":["Journal of Computer Science and Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11390-017-1748-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-017-1748-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-017-1748-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,9]],"date-time":"2019-10-09T23:18:56Z","timestamp":1570663136000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11390-017-1748-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,1]]},"references-count":29,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,1]]}},"alternative-id":["1748"],"URL":"https:\/\/doi.org\/10.1007\/s11390-017-1748-5","relation":{},"ISSN":["1000-9000","1860-4749"],"issn-type":[{"value":"1000-9000","type":"print"},{"value":"1860-4749","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,1]]},"assertion":[{"value":"4 September 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 April 2017","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 January 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}