{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,4,2]],"date-time":"2022-04-02T08:00:30Z","timestamp":1648886430471},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2010,12,11]],"date-time":"2010-12-11T00:00:00Z","timestamp":1292025600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2012,3]]},"DOI":"10.1007\/s11227-010-0522-z","type":"journal-article","created":{"date-parts":[[2010,12,10]],"date-time":"2010-12-10T14:47:59Z","timestamp":1291992479000},"page":"1229-1251","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing modulo scheduling to achieve reuse and\u00a0concurrency for stream processors"],"prefix":"10.1007","volume":"59","author":[{"given":"Li","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingling","family":"Xue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuejun","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2010,12,11]]},"reference":[{"key":"522_CR1","unstructured":"AMD (2006) AMD FireStream Stream Processor. http:\/\/atiamdcom\/products\/streamprocessor\/specshtml"},{"key":"522_CR2","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1145\/774789.774805","volume-title":"CODES \u201902: proceedings of the tenth international symposium on hardware\/software codesign","author":"R Banakar","year":"2002","unstructured":"Banakar R, Steinke S, Lee BS, Balakrishnan M, Marwedel P (2002) Scratchpad memory: design alternative for cache on-chip memory in embedded systems. In: CODES \u201902: proceedings of the tenth international symposium on hardware\/software codesign. ACM Press, New York, pp 73\u201378"},{"key":"522_CR3","first-page":"1","volume-title":"SC \u201908: proceedings of the 2008 ACM\/IEEE conference on supercomputing","author":"KJ Barker","year":"2008","unstructured":"Barker KJ, Davis K, Hoisie A, Kerbyson DJ, Lang M, Pakin S, Sancho JC (2008) Entering the petaflop era: the architecture and performance of roadrunner. In: SC \u201908: proceedings of the 2008 ACM\/IEEE conference on supercomputing, pp 1\u201311"},{"issue":"3","key":"522_CR4","doi-asserted-by":"crossref","first-page":"777","DOI":"10.1145\/1015706.1015800","volume":"23","author":"I Buck","year":"2004","unstructured":"Buck I, Foley T, Horn D, Sugerman J, Fatahalian K, Houston M, Hanrahan P (2004) Brook for gpus: stream computing on graphics hardware. ACM Trans Graph 23(3):777\u2013786","journal-title":"ACM Trans Graph"},{"key":"522_CR5","first-page":"11","volume-title":"MoBS \u201905: workshop on modeling, benchmarking, and simulation","author":"J Cuvillo","year":"2005","unstructured":"Cuvillo J, Zhu W, Ziang H, Gao G (2005) FAST: a functionally accurate simulation toolset for the Cyclops64 cellular architecture. In: MoBS \u201905: workshop on modeling, benchmarking, and simulation. ACM Press, New York, pp 11\u201320"},{"key":"522_CR6","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1145\/1048935.1050187","volume-title":"SC\u00a0\u201903: proceedings of the 2003 ACM\/IEEE conference on supercomputing","author":"WJ Dally","year":"2003","unstructured":"Dally WJ, Labonte F, Das A, Hanrahan P et al (2003) Merrimac: supercomputing with streams. In: SC\u00a0\u201903: proceedings of the 2003 ACM\/IEEE conference on supercomputing. IEEE Computer Society, Washington, pp 35\u201342"},{"key":"522_CR7","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1145\/1152154.1152164","volume-title":"PACT \u201906: proceedings of the 15th international conference on parallel architectures and compilation techniques","author":"A Das","year":"2006","unstructured":"Das A, Dally WJ, Mattson P (2006) Compiling for stream processing. In: PACT \u201906: proceedings of the 15th international conference on parallel architectures and compilation techniques. ACM Press, New York, pp 33\u201342"},{"key":"522_CR8","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1007\/s11227-008-0208-y","volume":"48","author":"G Dimitroulakos","year":"2009","unstructured":"Dimitroulakos G, Kostaras N, Galanis MD, Goutis CE (2009) Compiler assisted architectural exploration framework for coarse grained reconfigurable arrays. J Supercomput 48:115\u2013151","journal-title":"J Supercomput"},{"key":"522_CR9","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1145\/224538.224542","volume-title":"ICS \u201995: proceedings of the 9th international conference on supercomputing","author":"AE Eichenberger","year":"1995","unstructured":"Eichenberger AE, Davidson ES, Abraham SG (1995) Optimum modulo schedules for minimum register requirements. In: ICS \u201995: proceedings of the 9th international conference on supercomputing. ACM Press, New York, pp 31\u201340"},{"key":"522_CR10","doi-asserted-by":"crossref","first-page":"343","DOI":"10.1109\/MICRO.2005.32","volume-title":"MICRO 38: proceedings of the 38th annual IEEE\/ACM international symposium on microarchitecture","author":"J Gummaraju","year":"2005","unstructured":"Gummaraju J, Rosenblum M (2005) Stream programming on general-purpose processors. In: MICRO 38: proceedings of the 38th annual IEEE\/ACM international symposium on microarchitecture. IEEE Computer Society, Washington, pp 343\u2013354"},{"key":"522_CR11","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1145\/1346281.1346319","volume-title":"ASPLOS XIII: proceedings of the 13th international conference on architectural support for programming languages and operating systems","author":"J Gummaraju","year":"2008","unstructured":"Gummaraju J, Coburn J, Turner Y, Rosenblum M (2008) Streamware: programming general-purpose multicore processors using streams. In: ASPLOS XIII: proceedings of the 13th international conference on architectural support for programming languages and operating systems. ACM Press, New York, pp 297\u2013307"},{"key":"522_CR12","doi-asserted-by":"crossref","first-page":"114","DOI":"10.1145\/1375581.1375596","volume-title":"PLDI \u201908: proceedings of the 2008 ACM SIGPLAN conference on programming language design and implementation","author":"M Kudlur","year":"2008","unstructured":"Kudlur M, Mahlke S (2008) Orchestrating the execution of stream programs on multicore platforms. In: PLDI \u201908: proceedings of the 2008 ACM SIGPLAN conference on programming language design and implementation. ACM Press, New York, pp 114\u2013124"},{"key":"522_CR13","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1109\/PACT.2004.1342560","volume-title":"PACT \u201904: proceedings of the 13th international conference on parallel architectures and compilation techniques","author":"F Labonte","year":"2004","unstructured":"Labonte F, Mattson P, Thies W, Buck I, Kozyrakis C, Horowitz M (2004) The stream virtual machine. In: PACT \u201904: proceedings of the 13th international conference on parallel architectures and compilation techniques, pp 267\u2013277"},{"key":"522_CR14","doi-asserted-by":"crossref","first-page":"327","DOI":"10.1109\/MICRO.1995.476842","volume-title":"MICRO-28: proceedings of the 28th annual international symposium on microarchitecture","author":"DM Lavery","year":"1995","unstructured":"Lavery DM, Hwu WMW (1995) Unrolling-based optimizations for modulo scheduling. In: MICRO-28: proceedings of the 28th annual international symposium on microarchitecture, pp 327\u2013337"},{"key":"522_CR15","doi-asserted-by":"crossref","first-page":"358","DOI":"10.1145\/1250662.1250707","volume-title":"ISCA \u201907: proceedings of the 34th annual international symposium on computer architecture","author":"J Leverich","year":"2007","unstructured":"Leverich J, Arakida H, Solomatnikov A, Firoozshahian A, Horowitz M, Kozyrakis C (2007) Comparing memory systems for chip multiprocessors. In: ISCA \u201907: proceedings of the 34th annual international symposium on computer architecture. ACM Press, New York, pp 358\u2013368"},{"key":"522_CR16","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1007\/s11227-008-0192-2","volume":"45","author":"H Li","year":"2008","unstructured":"Li H, Zhang C, Li L, Ren J (2008) Transform coding on programmable stream processors. J Supercomput 45:66\u201387","journal-title":"J Supercomput"},{"key":"522_CR17","doi-asserted-by":"crossref","first-page":"80","DOI":"10.1109\/PACT.1996.554030","volume-title":"PACT \u201996: proceedings of the 1996 conference on parallel architectures and compilation techniques","author":"J Llosa","year":"1996","unstructured":"Llosa J (1996) Swing modulo scheduling: a lifetime-sensitive approach. In: PACT \u201996: proceedings of the 1996 conference on parallel architectures and compilation techniques. IEEE Computer Society, Washington, pp 80\u201386"},{"key":"522_CR18","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1362622.1362647","volume-title":"SC \u201907: proceedings of the 2007 ACM\/IEEE conference on supercomputing","author":"J Makino","year":"2007","unstructured":"Makino J, Hiraki K, Inaba M (2007) GRAPE-DR: 2-Pflops massively-parallel computer with 512-core, 512-Gflops processor chips for scientific computing. In: SC \u201907: proceedings of the 2007 ACM\/IEEE conference on supercomputing. ACM Press, New York, pp 1\u201311"},{"key":"522_CR19","unstructured":"NVIDIA (2009) CUDA Architecture Overview. http:\/\/developerdownloadnvidiacom\/compute\/cuda\/docs\/CUDA_Architecture_Overviewpdf"},{"key":"522_CR20","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1109\/ICCD.2002.1106785","volume-title":"ICCD \u201902 proceedings of the 2002 IEEE international conference on computer design: VLSI in computers and processors","author":"JD Owens","year":"2002","unstructured":"Owens JD, Kapasi UJ, Mattson P, Towles B, Serebrin B, Rixner S, Dally WJ (2002) Media processing applications on the imagine stream processor. In: ICCD \u201902 proceedings of the 2002 IEEE international conference on computer design: VLSI in computers and processors, Freiburg, Germany. IEEE Computer Society, Washington, pp 295\u2013302"},{"key":"522_CR21","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1145\/192724.192731","volume-title":"MICRO-27: proceedings of the 27th annual international symposium on microarchitecture","author":"BR Rau","year":"1994","unstructured":"Rau BR (1994) Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO-27: proceedings of the 27th annual international symposium on microarchitecture, pp 63\u201374"},{"key":"522_CR22","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1542452.1542454","volume-title":"LCTES \u201909: proceedings of the 2009 ACM SIGPLAN\/SIGBED conference on languages, compilers, and tools for embedded systems","author":"EJ Stotzer","year":"2009","unstructured":"Stotzer EJ, Leiss EL (2009) Modulo scheduling without overlapped lifetimes. In: LCTES \u201909: proceedings of the 2009 ACM SIGPLAN\/SIGBED conference on languages, compilers, and tools for embedded systems. ACM Press, New York, pp 1\u201310"},{"key":"522_CR23","unstructured":"Thies W, Karczmarek M, Gordon M, Maze D, Wong J, Ho H, Brown M, Amarasinghe S (2001) StreamIt: a compiler for streaming applications. MIT-LCS Technical Memo TM-622"},{"key":"522_CR24","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1145\/1375657.1375679","volume-title":"LCTES \u201908: proceedings of the 2008 ACM SIGPLAN-SIGBED conference on languages, compilers, and tools for embedded systems","author":"L Wang","year":"2008","unstructured":"Wang L, Yang X, Xue J, Deng Y, Yan X, Tang T, Nguyen QH (2008) Optimizing scientific application loops on stream processors. In: LCTES \u201908: proceedings of the 2008 ACM SIGPLAN-SIGBED conference on languages, compilers, and tools for embedded systems. ACM Press, New York, pp 161\u2013170"},{"key":"522_CR25","first-page":"1112","volume-title":"DATE \u201910: proceedings of the conference on design, automation and test in Europe","author":"L Wang","year":"2010","unstructured":"Wang L, Yang X, Xue J (2010) Reuse-aware modulo scheduling for stream processors. In: DATE \u201910: proceedings of the conference on design, automation and test in Europe, pp 1112\u20131117"},{"key":"522_CR26","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1145\/1128022.1128027","volume-title":"CF \u201906: proceedings of the 3rd conference on computing frontiers","author":"S Williams","year":"2006","unstructured":"Williams S, Shalf J, Oliker L, Kamil S, Husbands P, Yelick K (2006) The potential of the cell processor for scientific computing. In: CF \u201906: proceedings of the 3rd conference on computing frontiers. ACM Press, New York, pp 9\u201320"},{"key":"522_CR27","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1145\/113445.113449","volume-title":"PLDI \u201991: proceedings of the 1991 conference on programming language design and implementation","author":"ME Wolf","year":"1991","unstructured":"Wolf ME, Lam MS (1991) A data locality optimizing algorithm. In: PLDI \u201991: proceedings of the 1991 conference on programming language design and implementation, pp 30\u201344"},{"key":"522_CR28","first-page":"16","volume-title":"LCPC \u201997: proceedings of the 10th workshop on languages and compilers for parallel computing","author":"J Xue","year":"1997","unstructured":"Xue J, Huang CH (1997) Reuse-driven tiling for data locality. In: LCPC \u201997: proceedings of the 10th workshop on languages and compilers for parallel computing. Springer, Berlin, pp 16\u201333"},{"key":"522_CR29","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1145\/1250662.1250689","volume-title":"ISCA \u201907: proceedings of the 34th annual international symposium on computer architecture","author":"X Yang","year":"2007","unstructured":"Yang X, Yan X, Xing Z, Deng Y, Jiang J, Zhang Y (2007) A 64-bit stream processor architecture for scientific applications. In: ISCA \u201907: proceedings of the 34th annual international symposium on computer architecture. ACM Press, New York, pp 210\u2013219"},{"key":"522_CR30","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1007\/s11227-008-0186-0","volume":"47","author":"X Yang","year":"2009","unstructured":"Yang X, Du J, Yan X, Deng Y (2009) Matrix-based streamization approach for improving locality and parallelism on ft64 stream processor. J Supercomput 47:171\u2013197","journal-title":"J Supercomput"},{"issue":"11","key":"522_CR31","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1839667.1839673","volume":"7","author":"X Yang","year":"2010","unstructured":"Yang X, Zhang Y, Lu X, Xue J, Rogers I, Li G, Wang G, Fang X (2010) Exploiting the reuse supplied by loop-dependent stream references for stream processors. ACM Trans Archit Code Optim 7(11):1\u201335","journal-title":"ACM Trans Archit Code Optim"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-010-0522-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-010-0522-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-010-0522-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,6]],"date-time":"2019-06-06T23:45:40Z","timestamp":1559864740000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-010-0522-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,12,11]]},"references-count":31,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2012,3]]}},"alternative-id":["522"],"URL":"https:\/\/doi.org\/10.1007\/s11227-010-0522-z","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,12,11]]}}}