{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T17:47:23Z","timestamp":1725558443934},"publisher-location":"Berlin, Heidelberg","reference-count":31,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642133732"},{"type":"electronic","value":"9783642133749"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-13374-9_7","type":"book-chapter","created":{"date-parts":[[2010,6,10]],"date-time":"2010-06-10T07:15:59Z","timestamp":1276154159000},"page":"95-110","source":"Crossref","is-referenced-by-count":0,"title":["Mapping Streaming Languages to General Purpose Processors through Vectorization"],"prefix":"10.1007","author":[{"given":"Raymond","family":"Manley","sequence":"first","affiliation":[]},{"given":"David","family":"Gregg","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Owens, J.D., Rixner, S., Kapasi, U.J., Mattson, P., Towles, B., Serebrin, B., Dally, W.J.: Media processing applications on the imagine stream processor. In: International Conference on Computer Design, p. 295 (2002)","DOI":"10.1109\/ICCD.2002.1106785"},{"key":"7_CR2","volume-title":"ISCA 2004: Proceedings of the 31st annual international symposium on Computer architecture","author":"M.B. Taylor","year":"2004","unstructured":"Taylor, M.B., Lee, W., Miller, J., Wentzlaff, D., Bratt, I., Greenwald, B., Hoffmann, H., Johnson, P., Kim, J., Psota, J., Saraf, A., Shnidman, N., Strumpen, V., Frank, M., Amarasinghe, S., Agarwal, A.: Evaluation of the raw microprocessor: An exposed-wire-delay architecture for ILP and streams. In: ISCA 2004: Proceedings of the 31st annual international symposium on Computer architecture, Washington, DC, USA, vol.\u00a02. IEEE Computer Society, Los Alamitos (2004)"},{"issue":"2","key":"7_CR3","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1109\/40.918001","volume":"21","author":"B. Khailany","year":"2001","unstructured":"Khailany, B., Dally, W.J., Kapasi, U.J., Mattson, P., Namkoong, J., Owens, J.D., Towles, B., Chang, A., Rixner, S.: Imagine: Media processing with streams. IEEE Micro\u00a021(2), 35\u201346 (2001)","journal-title":"IEEE Micro"},{"key":"7_CR4","unstructured":"Zhang, X.D.: A streaming computation framework for the cell processor. M. eng. thesis, Massachusetts Institute of Technology, Cambridge, MA (August 2007)"},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Zhang, X.D., Li, Q.J., Rabbah, R., Amarasinghe, S.: A lightweight streaming layer for multicore execution. In: Workshop on Design, Architecture and Simulation of Chip Multi-Processors, Chicago, IL (December 2007)","DOI":"10.1145\/1399972.1399978"},{"key":"7_CR6","unstructured":"Amarasinghe, S.: StreamIt A Programming Language for the Era of Multicores (November 2006)"},{"key":"7_CR7","doi-asserted-by":"publisher","first-page":"777","DOI":"10.1145\/1186562.1015800","volume-title":"SIGGRAPH 2004: ACM SIGGRAPH 2004 Papers","author":"I. Buck","year":"2004","unstructured":"Buck, I., Foley, T., Horn, D., Sugerman, J., Fatahalian, K., Houston, M., Hanrahan, P.: Brook for GPUs: stream computing on graphics hardware. In: SIGGRAPH 2004: ACM SIGGRAPH 2004 Papers, pp. 777\u2013786. ACM, New York (2004)"},{"key":"7_CR8","doi-asserted-by":"crossref","first-page":"343","DOI":"10.1109\/MICRO.2005.32","volume-title":"MICRO 38: Proceedings of the 38th annual IEEE\/ACM International Symposium on Microarchitecture","author":"J. Gummaraju","year":"2005","unstructured":"Gummaraju, J., Rosenblum, M.: Stream programming on general-purpose processors. In: MICRO 38: Proceedings of the 38th annual IEEE\/ACM International Symposium on Microarchitecture, Washington, DC, USA, pp. 343\u2013354. IEEE Computer Society, Los Alamitos (2005)"},{"key":"7_CR9","first-page":"3","volume-title":"PACT 2007: Proceedings of the 16th International Conference on Parallel Architecture and Compilation Techniques","author":"J. Gummaraju","year":"2007","unstructured":"Gummaraju, J., Erez, M., Coburn, J., Rosenblum, M., Dally, W.J.: Architectural support for the stream execution model on general-purpose processors. In: PACT 2007: Proceedings of the 16th International Conference on Parallel Architecture and Compilation Techniques, Washington, DC, USA, pp. 3\u201312. IEEE Computer Society, Los Alamitos (2007)"},{"issue":"8","key":"7_CR10","doi-asserted-by":"publisher","first-page":"1015","DOI":"10.1109\/TC.2003.1223637","volume":"52","author":"D. Talla","year":"2003","unstructured":"Talla, D., John, L.K., Burger, D.: Bottlenecks in multimedia processing with SIMD style extensions and architectural enhancements. IEEE Trans. Comput.\u00a052(8), 1015\u20131031 (2003)","journal-title":"IEEE Trans. Comput."},{"key":"7_CR11","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1145\/1375581.1375596","volume-title":"PLDI 2008: Proceedings of the 2008 ACM SIGPLAN conference on Programming language design and implementation","author":"M. Kudlur","year":"2008","unstructured":"Kudlur, M., Mahlke, S.: Orchestrating the execution of stream programs on multicore platforms. In: PLDI 2008: Proceedings of the 2008 ACM SIGPLAN conference on Programming language design and implementation, pp. 114\u2013124. ACM, New York (2008)"},{"issue":"2","key":"7_CR12","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1145\/1353535.1346319","volume":"42","author":"J. Gummaraju","year":"2008","unstructured":"Gummaraju, J., Coburn, J., Turner, Y., Rosenblum, M.: Streamware: programming general-purpose multicore processors using streams. SIGOPS Oper. Syst. Rev.\u00a042(2), 297\u2013307 (2008)","journal-title":"SIGOPS Oper. Syst. Rev."},{"key":"7_CR13","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1109\/CGO.2006.13","volume-title":"CGO 2006: Proceedings of the International Symposium on Code Generation and Optimization","author":"S. wei Liao","year":"2006","unstructured":"wei Liao, S., Du, Z., Wu, G., Lueh, G.Y.: Data and computation transformations for brook streaming applications on multiprocessors. In: CGO 2006: Proceedings of the International Symposium on Code Generation and Optimization, Washington, DC, USA, pp. 196\u2013207. IEEE Computer Society, Los Alamitos (2006)"},{"key":"7_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1007\/3-540-45937-5_14","volume-title":"Compiler Construction","author":"W. Thies","year":"2002","unstructured":"Thies, W., Karczmarek, M., Amarasinghe, S.P.: Streamit: A language for streaming applications. In: Horspool, R.N. (ed.) CC 2002. LNCS, vol.\u00a02304, pp. 179\u2013196. Springer, Heidelberg (2002)"},{"key":"7_CR15","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1145\/1152154.1152164","volume-title":"PACT 2006: Proceedings of the 15th international conference on Parallel architectures and compilation techniques","author":"A. Das","year":"2006","unstructured":"Das, A., Dally, W.J., Mattson, P.: Compiling for stream processing. In: PACT 2006: Proceedings of the 15th international conference on Parallel architectures and compilation techniques, pp. 33\u201342. ACM, New York (2006)"},{"issue":"2","key":"7_CR16","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/s10766-005-3590-6","volume":"33","author":"S. Amarasinghe","year":"2005","unstructured":"Amarasinghe, S., Gordon, M.I., Karczmarek, M., Lin, J., Maze, D., Rabbah, R.M., Thies, W.: Language and compiler design for streaming applications. Int. J. Parallel Program.\u00a033(2), 261\u2013278 (2005)","journal-title":"Int. J. Parallel Program."},{"key":"7_CR17","unstructured":"Advanced Micro Devices, Inc.: AMD Brook+ (November 2007), http:\/\/ati.amd.com\/technology\/streamcomputing\/AMD-Brookplus.pdf"},{"key":"7_CR18","unstructured":"Nuzman, D., Zaks, A.: Autovectorization in GCC - two years later. In: GCC Summit (June 2006)"},{"key":"7_CR19","unstructured":"Naishlos, D.: Autovectorization in GCC. In: GCC Summit (June 2004)"},{"key":"7_CR20","unstructured":"Intel Corp.: Intel(R) C++ Compiler Intrinsics Reference (2007) ftp:\/\/download.intel.com\/support\/performancetools\/c\/linux\/v9\/intref_cls.pdf"},{"key":"7_CR21","unstructured":"Intel Corp.: Intel(R) 64 and IA-32 Architectures Optimization Reference Manual (2007), http:\/\/www.intel.com\/design\/processor\/manuals\/248966.pdf"},{"key":"7_CR22","unstructured":"Mucci, P.J.: PapiEx - Execute arbitrary application and measure hardware performance counters with PAPI (2009), http:\/\/icl.cs.utk.edu\/~mucci\/papiex\/"},{"issue":"6","key":"7_CR23","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1145\/1133255.1133997","volume":"41","author":"D. Nuzman","year":"2006","unstructured":"Nuzman, D., Rosen, I., Zaks, A.: Auto-vectorization of interleaved data for SIMD. SIGPLAN Not.\u00a041(6), 132\u2013143 (2006)","journal-title":"SIGPLAN Not."},{"key":"7_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1007\/978-3-540-89740-8_2","volume-title":"Languages and Compilers for Parallel Computing","author":"J. Stratton","year":"2008","unstructured":"Stratton, J., Stone, S., mei Hwu, W.: MCUDA: An efficient implementation of CUDA kernels for multi-core CPUs. In: Amaral, J.N. (ed.) LCPC 2008. LNCS, vol.\u00a05335, pp. 16\u201330. Springer, Heidelberg (2008)"},{"key":"7_CR25","unstructured":"RapidMind: RapidMind Development Platform (May 2008), http:\/\/www.sharcnet.ca\/events\/ssgc2008\/presentations\/2008-05-27%20RapidMind%20SHARCnet.pdf"},{"key":"7_CR26","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1023\/A:1007507005174","volume":"28","author":"A. Krall","year":"2000","unstructured":"Krall, A., Lelait, S.: Compilation techniques for multimedia processors. International Journal of Parallel Programming\u00a028, 347\u2013361 (2000)","journal-title":"International Journal of Parallel Programming"},{"key":"7_CR27","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1145\/29873.29875","volume":"9","author":"R. Allen","year":"1987","unstructured":"Allen, R., Kennedy, K.: Automatic translation of FORTRAN programs to vector form. ACM Transactions on Programming Languages and Systems\u00a09, 491\u2013542 (1987)","journal-title":"ACM Transactions on Programming Languages and Systems"},{"key":"7_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"420","DOI":"10.1007\/978-3-540-24644-2_27","volume-title":"Languages and Compilers for Parallel Computing","author":"G. Ren","year":"2004","unstructured":"Ren, G., Wu, P., Padua, D.: A preliminary study on the vectorization of multimedia applications for multimedia extensions. In: Rauchwerger, L. (ed.) LCPC 2003. LNCS, vol.\u00a02958, pp. 420\u2013435. Springer, Heidelberg (2004)"},{"key":"7_CR29","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1109\/MICRO.2005.20","volume-title":"MICRO 38: Proceedings of the 38th annual IEEE\/ACM International Symposium on Microarchitecture","author":"S. Larsen","year":"2005","unstructured":"Larsen, S., Rabbah, R., Amarasinghe, S.: Exploiting vector parallelism in software pipelined loops. In: MICRO 38: Proceedings of the 38th annual IEEE\/ACM International Symposium on Microarchitecture, Washington, DC, USA, pp. 119\u2013129. IEEE Computer Society, Los Alamitos (2005)"},{"key":"7_CR30","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1109\/CGO.2006.25","volume-title":"CGO 2006: Proceedings of the International Symposium on Code Generation and Optimization","author":"D. Nuzman","year":"2006","unstructured":"Nuzman, D., Henderson, R.: Multi-platform auto-vectorization. In: CGO 2006: Proceedings of the International Symposium on Code Generation and Optimization, Washington, DC, USA, pp. 281\u2013294. IEEE Computer Society, Los Alamitos (2006)"},{"key":"7_CR31","unstructured":"Intel Corp.: Intel(R) Advanced Vector Extensions Programming Reference (2008), http:\/\/softwarecommunity.intel.com\/isn\/downloads\/intelavx\/Intel-AVX-Programming-Reference-319433003.pdf"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-13374-9_7.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,23]],"date-time":"2020-11-23T22:05:31Z","timestamp":1606169131000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-13374-9_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642133732","9783642133749"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-13374-9_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2010]]}}}