{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T07:48:21Z","timestamp":1743752901424},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. Comput. Sci. Technol."],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1007\/s11390-020-0555-6","type":"journal-article","created":{"date-parts":[[2022,8,13]],"date-time":"2022-08-13T02:02:45Z","timestamp":1660356165000},"page":"942-959","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Accelerating Data Transfer in Dataflow Architectures Through a Look-Ahead Acknowledgment Mechanism"],"prefix":"10.1007","volume":"37","author":[{"given":"Yu-Jing","family":"Feng","sequence":"first","affiliation":[]},{"given":"De-Jian","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xu","family":"Tan","sequence":"additional","affiliation":[]},{"given":"Xiao-Chun","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Dong-Rui","family":"Fan","sequence":"additional","affiliation":[]},{"given":"Wen-Ming","family":"Li","sequence":"additional","affiliation":[]},{"given":"Da","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhi-Min","family":"Tang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,7,30]]},"reference":[{"key":"555_CR1","doi-asserted-by":"publisher","unstructured":"Dennis J B. Retrospective: A preliminary architecture for a basic data-flow processor. In Proc. the 25 Years of the International Symposia on Computer Architecture, August 1998, pp.2-4. https:\/\/doi.org\/10.1145\/285930.285932.","DOI":"10.1145\/285930.285932"},{"issue":"3","key":"555_CR2","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/12.48862","volume":"39","author":"NRS Arvind","year":"1990","unstructured":"Arvind, Nikhil R S. Executing a program on the MIT tagged-token dataflow architecture. IEEE Transactions on Computers, 1990, 39(3): 300-318. https:\/\/doi.org\/10.1109\/12.48862.","journal-title":"IEEE Transactions on Computers"},{"key":"555_CR3","doi-asserted-by":"publisher","unstructured":"Sankaralingam K, Nagarajan R, Liu H, Kim C, Huh J, Burger D, Keckler S W, Moore C R. Exploiting ILP, TLP, and DLP with the polymorphous TRIPS architecture. In Proc. the 30th Annual International Symposium on Computer Architecture, June 2003, pp.422-433. https:\/\/doi.org\/10.1109\/ISCA.2003.1207019.","DOI":"10.1109\/ISCA.2003.1207019"},{"key":"555_CR4","doi-asserted-by":"publisher","unstructured":"Swanson S, Michelson K, Schwerin A, Oskin M. WaveScalar. In Proc. the 36th Annual IEEE\/ACM International Symposium on Microarchitecture, December 2003, pp.291-302. https:\/\/doi.org\/10.1109\/MICRO.2003.1253203.","DOI":"10.1109\/MICRO.2003.1253203"},{"key":"555_CR5","doi-asserted-by":"publisher","unstructured":"Pratas F, Oriato D, Pell O, Mata R A, Sousa L. Accelerating the computation of induced dipoles for molecular mechanics with dataflow engines. In Proc. the 21st IEEE Annual International Symposium on Field-Programmable Custom Computing Machines, April 2013, pp.177-180. https:\/\/doi.org\/10.1109\/FCCM.2013.34.","DOI":"10.1109\/FCCM.2013.34"},{"issue":"1","key":"555_CR6","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/MM.2013.111","volume":"34","author":"H Fu","year":"2014","unstructured":"Fu H, Gan L, Clapp R G, Ruan H, Pell O, Mencer O, Flynn M, Huang X, Yang G. Scaling reverse time migration performance through reconfigurable dataflow engines. IEEE Micro, 2014, 34(1): 30-40. https:\/\/doi.org\/10.1109\/MM.2013.111.","journal-title":"IEEE Micro"},{"key":"555_CR7","doi-asserted-by":"publisher","unstructured":"Coons K E, Chen X, Burger D, McKinley K S, Kushwaha S K. A spatial path scheduling algorithm for EDGE architectures. In Proc. the 12th International Conference on Architectural Support for Programming Languages and Operating Systems, October 2006, pp.129-140. https:\/\/doi.org\/10.1145\/1168857.1168875.","DOI":"10.1145\/1168857.1168875"},{"key":"555_CR8","doi-asserted-by":"publisher","unstructured":"Liu D, Yin S, Liu L, Wei S. Polyhedral model based mapping optimization of loop nests for CGRAs. In Proc. the 50th ACM\/EDAC\/IEEE Design Automation Conference, May 29-June 7, 2013, Article No.19. https:\/\/doi.org\/10.1145\/2463209.2488757.","DOI":"10.1145\/2463209.2488757"},{"issue":"6","key":"555_CR9","doi-asserted-by":"publisher","first-page":"495","DOI":"10.1145\/2499370.2462163","volume":"48","author":"T Nowatzki","year":"2013","unstructured":"Nowatzki T, Sartin-Tarm M, De Carli L, Sankaralingam K, Estan C, Robatmili B. A general constraint-centric scheduling framework for spatial architectures. ACM SIGPLAN Notices, 2013, 48(6): 495-506. https:\/\/doi.org\/10.1145\/2499370.2462163.","journal-title":"ACM SIGPLAN Notices"},{"key":"555_CR10","doi-asserted-by":"publisher","unstructured":"Nowatzki T, Gangadhar V, Sankaralingam K. Exploring the potential of heterogeneous von Neumann\/dataflow execution models. In Proc. the 42nd Annual International Symposium on Computer Architecture, June 2015, pp.298-310. https:\/\/doi.org\/10.1145\/2749469.2750380.","DOI":"10.1145\/2749469.2750380"},{"key":"555_CR11","doi-asserted-by":"publisher","unstructured":"Sankaralingam K, Nagarajan R, McDonald R et al. Distributed microarchitectural protocols in the TRIPS proto-type processor. In Proc. the 39th Annual IEEE\/ACM International Symposium on Microarchitecture, December 2006, pp.480-491. https:\/\/doi.org\/10.1109\/MICRO.2006.19.","DOI":"10.1109\/MICRO.2006.19"},{"key":"555_CR12","unstructured":"Putnam A, Swanson S, Mercaldi M, Michelson K, Petersen A, Schwerin A, Oskin M, Eggers S. The microarchitecture of a pipelined WaveScalar processor: An RTL-based study. Technical Report, University of Washington, 2004. http:\/\/cseweb.ucsd.edu\/swanson\/papers\/TR-2004-11-02.pdf, Sept. 2020."},{"key":"555_CR13","doi-asserted-by":"crossref","unstructured":"Shimada T, Hiraki K, Nishida K, Sekiguchi S. Evaluation of a prototype data ow processor of the SIGMA-1 for scientific computations. In Proc. the 13th Annual International Symposium on Computer Architecture, June 1986, pp.226-234.","DOI":"10.1145\/17356.17383"},{"key":"555_CR14","doi-asserted-by":"publisher","unstructured":"Papadopoulos G M, Culler D E. Monsoon: An explicit token-store architecture. In Proc. the 25 Years of the International Symposia on Computer Architecture, August 1998, pp.398-407. https:\/\/doi.org\/10.1145\/285930.285999.","DOI":"10.1145\/285930.285999"},{"issue":"5","key":"555_CR15","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1109\/MM.2012.51","volume":"32","author":"V Govindaraju","year":"2012","unstructured":"Govindaraju V, Ho C H, Nowatzki T, Chhugani J, Satish N, Sankaralingam K, Kim C. DySER: Unifying functionality and parallelism specialization for energy-efficient computing. IEEE Micro, 2012, 32(5): 38-51. https:\/\/doi.org\/10.1109\/MM.2012.51.","journal-title":"IEEE Micro"},{"issue":"1","key":"555_CR16","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/s11390-017-1703-5","volume":"32","author":"X Shen","year":"2017","unstructured":"Shen X, Ye X, Tan X, Wang D, Zhang L, Li W, Zhang Z, Fan D. An efficient network-on-chip router for dataflow architecture. Journal of Computer Science and Technology, 2017, 32(1): 11-25. https:\/\/doi.org\/10.1007\/s11390-017-1703-5.","journal-title":"Journal of Computer Science and Technology"},{"issue":"11","key":"555_CR17","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1145\/1168918.1168876","volume":"41","author":"M Mercaldi","year":"2006","unstructured":"Mercaldi M, Swanson S, Petersen A, Putnam A, Schwerin A, Oskin M, Eggers S J. Instruction scheduling for a tiled dataflow architecture. ACM SIGPLAN Notices, 2006, 41(11): 141-150. https:\/\/doi.org\/10.1145\/1168918.1168876.","journal-title":"ACM SIGPLAN Notices"},{"key":"555_CR18","doi-asserted-by":"publisher","unstructured":"Voitsechov D, Etsion Y. Single-graph multiple flows: Energy efficient design alternative for GPGPUs. In Proc. the 41st ACM\/IEEE Annual International Symposium on Computer Architecture, June 2014, pp.205-216. https:\/\/doi.org\/10.1109\/ISCA.2014.6853234.","DOI":"10.1109\/ISCA.2014.6853234"},{"issue":"1","key":"555_CR19","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1109\/MC.1984.1658927","volume":"17","author":"JKF Lee","year":"1984","unstructured":"Lee J K F, Smith A J. Branch prediction strategies and branch target buffer design. Computer, 1984, 17(1): 6-22. https:\/\/doi.org\/10.1109\/MC.1984.1658927.","journal-title":"Computer"},{"key":"555_CR20","doi-asserted-by":"publisher","unstructured":"Ye X, Fan D, Sun N, Tang S, Zhang M, Zhang H. SimICT: A fast and flexible framework for performance and power evaluation of large-scale architecture. In Proc. the 2013 International Symposium on Low Power Electronics and Design, September 2013, pp.273-278. https:\/\/doi.org\/10.1109\/ISLPED.2013.6629308.","DOI":"10.1109\/ISLPED.2013.6629308"},{"key":"555_CR21","doi-asserted-by":"publisher","unstructured":"Han R, Lu X Y, Xu J T. On Big Data Benchmarking. In Big Data Benchmarks, Performance Optimization, and Emerging Hardware, Zhan J, Han R, Weng C (eds.), Springer, 2014, pp.3-18. https:\/\/doi.org\/10.1007\/978-3-319-13021-7_1.","DOI":"10.1007\/978-3-319-13021-7_1"},{"issue":"3","key":"555_CR22","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1145\/268806.268810","volume":"25","author":"D Burger","year":"1997","unstructured":"Burger D, Austin T M. The SimpleScalar tool set, version 2.0. SIGARCH Comput. Archit. News, 1997, 25(3): 13-25. https:\/\/doi.org\/10.1145\/268806.268810.","journal-title":"SIGARCH Comput. Archit. News"},{"issue":"11","key":"555_CR23","doi-asserted-by":"publisher","first-page":"2045","DOI":"10.1109\/TPDS.2011.311","volume":"23","author":"J Kurzak","year":"2012","unstructured":"Kurzak J, Tomov S, Dongarra J. Autotuning GEMM kernels for the Fermi GPU. IEEE Transactions on Parallel and Distributed Systems, 2012, 23(11): 2045-2057. https:\/\/doi.org\/10.1109\/TPDS.2011.311.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"555_CR24","doi-asserted-by":"publisher","unstructured":"Del Mundo C, Feng W. Towards a performance-portable FFT library for heterogeneous computing. In Proc. the 11th ACM Conference on Computing Frontiers, May 2014, Article No. 11. https:\/\/doi.org\/10.1145\/2597917.2597943.","DOI":"10.1145\/2597917.2597943"},{"key":"555_CR25","doi-asserted-by":"publisher","unstructured":"Holewinski J, Pouchet L N, Sadayappan P. High-performance code generation for stencil computations on GPU architectures. In Proc. the 26th ACM International Conference on Supercomputing, June 2012, pp.311-320. https:\/\/doi.org\/10.1145\/2304576.2304619.","DOI":"10.1145\/2304576.2304619"},{"key":"555_CR26","unstructured":"Stratton J A, Rodrigues C, Sung I, Obeid N, Chang L, Anssari N, Liu G D, Hwu W W. Parboil: A revised benchmark suite for scientific and commercial through-put computing. Technical Report, University of Illinois at Urbana-Champaign, 2012. http:\/\/impact.crhc.illinois.e-du\/Shared\/Docs\/impact-12-01.parboil.pdf, Sept. 2020."},{"key":"555_CR27","doi-asserted-by":"publisher","unstructured":"Siehl K, Zhao X. Supporting energy-efficient computing on heterogeneous CPU-GPU architectures. In Proc. the 5th IEEE International Conference on Future Internet of Things and Cloud, August 2017, pp.134-141. https:\/\/doi.org\/10.1109\/FiCloud.2017.46.","DOI":"10.1109\/FiCloud.2017.46"},{"key":"555_CR28","doi-asserted-by":"publisher","unstructured":"Burtscher M, Zecena I, Zong Z. Measuring GPU power with the K20 built-in sensor. In Proc. the 7th Workshop on General Purpose Processing Using GPUs, March 2014, pp.28-36. https:\/\/doi.org\/10.1145\/2588768.2576783.","DOI":"10.1145\/2588768.2576783"}],"container-title":["Journal of Computer Science and Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-020-0555-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11390-020-0555-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-020-0555-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,13]],"date-time":"2022-08-13T02:13:30Z","timestamp":1660356810000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11390-020-0555-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7]]},"references-count":28,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,7]]}},"alternative-id":["555"],"URL":"https:\/\/doi.org\/10.1007\/s11390-020-0555-6","relation":{},"ISSN":["1000-9000","1860-4749"],"issn-type":[{"value":"1000-9000","type":"print"},{"value":"1860-4749","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,7]]},"assertion":[{"value":"15 April 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 July 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}