{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T06:08:31Z","timestamp":1743142111218,"version":"3.40.3"},"publisher-location":"Cham","reference-count":18,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319174723"},{"type":"electronic","value":"9783319174730"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-17473-0_25","type":"book-chapter","created":{"date-parts":[[2015,4,30]],"date-time":"2015-04-30T09:59:39Z","timestamp":1430387979000},"page":"382-396","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient Exploitation of Hyper Loop Parallelism in Vectorization"],"prefix":"10.1007","author":[{"given":"Shixiong","family":"Xu","sequence":"first","affiliation":[]},{"given":"David","family":"Gregg","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,5,1]]},"reference":[{"key":"25_CR1","volume-title":"Optimizing Compilers for Modern Architectures: A Dependence-Based Approach","author":"K Kennedy","year":"2002","unstructured":"Kennedy, K., Allen, J.R.: Optimizing Compilers for Modern Architectures: A Dependence-Based Approach. Morgan Kaufmann Publishers Inc., San Francisco (2002)"},{"key":"25_CR2","doi-asserted-by":"crossref","unstructured":"Larsen, S., Amarasinghe, S.: Exploiting superword level parallelism with multimedia instruction sets. In: The 2000 Conference on Programming Language Design and Implementation, PLDI 2000 (2000)","DOI":"10.1145\/349299.349320"},{"key":"25_CR3","doi-asserted-by":"crossref","unstructured":"Liu, J., Zhang, Y., Jang, O., Ding, W., Kandemir, M.: A compiler framework for extracting superword level parallelism. In: Proceedings of the 33rd ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2012, pp. 347\u2013358. ACM, New York (2012)","DOI":"10.1145\/2254064.2254106"},{"key":"25_CR4","doi-asserted-by":"crossref","unstructured":"Ramachandran, A., Vienne, J., Van Der Wijngaart, R., Koesterke, L., Sharapov, I.: Performance evaluation of NAS parallel benchmarks on Intel Xeon Phi. In: 2013 42nd International Conference on Parallel Processing (ICPP), pp. 736\u2013743, October 2013","DOI":"10.1109\/ICPP.2013.87"},{"key":"25_CR5","doi-asserted-by":"crossref","unstructured":"Bocchino, Jr., R.L., Adve, V.S.: Vector LLVA: a virtual vector instruction set for media processing. In: The 2006 International Conference on Virtual Execution Environments (2006)","DOI":"10.1145\/1134760.1134769"},{"issue":"6","key":"25_CR6","doi-asserted-by":"publisher","first-page":"753","DOI":"10.1007\/s10766-012-0211-z","volume":"41","author":"H Bae","year":"2013","unstructured":"Bae, H., et al.: The cetus source-to-source compiler infrastructure: overview and evaluation. Int. J. Parallel Program. 41(6), 753\u2013767 (2013)","journal-title":"Int. J. Parallel Program."},{"key":"25_CR7","doi-asserted-by":"crossref","unstructured":"Nuzman, D., et al.: Auto-vectorization of Interleaved Data for SIMD. In: The 2006 Conference on Programming Language Design and Implementation, PLDI 2006 (2006)","DOI":"10.1145\/1133981.1133997"},{"key":"25_CR8","doi-asserted-by":"crossref","unstructured":"Ren, G., et al.: Optimizing data permutations for SIMD devices. In: The 2006 Conference on Programming Language Design and Implementation (2006)","DOI":"10.1145\/1133981.1133996"},{"key":"25_CR9","unstructured":"Melax, S.: 3D Vector Normalization Using 256-Bit Intel$$\\textregistered $$ Advanced Vector Extensions. Intel Developer Zone (2012)"},{"key":"25_CR10","doi-asserted-by":"crossref","unstructured":"Pennycook, S.J., et al.: Exploring SIMD for molecular dynamics, using Intel Xeon processors and Inte Xeon Phi coprocessors. In: The 27th International Symposium on Parallel and Distributed Processing, IPDPS 2013 (2013)","DOI":"10.1109\/IPDPS.2013.44"},{"key":"25_CR11","doi-asserted-by":"crossref","unstructured":"Nuzman, D., Zaks, A.: Outer-loop vectorization: revisited for short SIMD architectures. In: The 2008 Conference on Parallel Architectures and Compilation Techniques (2008)","DOI":"10.1145\/1454115.1454119"},{"key":"25_CR12","doi-asserted-by":"crossref","unstructured":"Nuzman, D., et al.: Vapor SIMD: auto-vectorize once, run everywhere. In: The 2011 International Symposium on Code Generation and Optimization (2011)","DOI":"10.1109\/CGO.2011.5764683"},{"key":"25_CR13","doi-asserted-by":"crossref","unstructured":"Kim, S., Han, H.: Efficient SIMD code generation for irregular kernels. In: The 2012 Symposium on Principles and Practice of Parallel Programming, PPoPP 2012 (2012)","DOI":"10.1145\/2145816.2145824"},{"key":"25_CR14","doi-asserted-by":"crossref","unstructured":"Karrenberg, R., Hack, S.: Whole-function vectorization. In: The 9th International Symposium on Code Generation and Optimization (2011)","DOI":"10.1109\/CGO.2011.5764682"},{"key":"25_CR15","unstructured":"Das, D., Chakraborty, S.S., Lai, M.: Experience with partial SIMDization in Open64 compiler using dynamic programming. In: Open64 Workshop (2012)"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"Trifunovic, K., et al.: Polyhedral-model guided loop-nest auto-vectorization. In: The 2009 International Conference on Parallel Architectures and Compilation Techniques (2009)","DOI":"10.1109\/PACT.2009.18"},{"key":"25_CR17","doi-asserted-by":"crossref","unstructured":"Park, Y., et al.: SIMD defragmenter: efficient ILP realization on data-parallel architectures. In: Proceedings of the Seventeenth International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS XVII (2012)","DOI":"10.1145\/2150976.2151014"},{"key":"25_CR18","doi-asserted-by":"crossref","unstructured":"Wu, P., et al.: An integrated simdization framework using virtual vectors. In: The 2005 Annual International Conference on Supercomputing, SC 2005 (2005)","DOI":"10.1145\/1088149.1088172"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-17473-0_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T08:12:12Z","timestamp":1676016732000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-17473-0_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319174723","9783319174730"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-17473-0_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"1 May 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}