{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,9,29]],"date-time":"2023-09-29T20:23:05Z","timestamp":1696018985795},"reference-count":34,"publisher":"Elsevier BV","issue":"5-6","license":[{"start":{"date-parts":[[2014,5,1]],"date-time":"2014-05-01T00:00:00Z","timestamp":1398902400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2016,6,29]],"date-time":"2016-06-29T00:00:00Z","timestamp":1467158400000},"content-version":"am","delay-in-days":790,"URL":"http:\/\/www.elsevier.com\/open-access\/userlicense\/1.0\/"},{"start":{"date-parts":[[2014,5,1]],"date-time":"2014-05-01T00:00:00Z","timestamp":1398902400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2014,5,1]],"date-time":"2014-05-01T00:00:00Z","timestamp":1398902400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2014,5,1]],"date-time":"2014-05-01T00:00:00Z","timestamp":1398902400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2014,5,1]],"date-time":"2014-05-01T00:00:00Z","timestamp":1398902400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2014,5,1]],"date-time":"2014-05-01T00:00:00Z","timestamp":1398902400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science","doi-asserted-by":"publisher","award":["23700057","23300007"]},{"DOI":"10.13039\/501100002241","name":"Japan Science and Technology Agency","doi-asserted-by":"publisher"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Parallel Computing"],"published-print":{"date-parts":[[2014,5]]},"DOI":"10.1016\/j.parco.2014.03.013","type":"journal-article","created":{"date-parts":[[2014,4,1]],"date-time":"2014-04-01T15:03:34Z","timestamp":1396364614000},"page":"59-69","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":16,"title":["Improving cache locality for GPU-based volume rendering"],"prefix":"10.1016","volume":"40","author":[{"given":"Yuki","family":"Sugimoto","sequence":"first","affiliation":[]},{"given":"Fumihiko","family":"Ino","sequence":"additional","affiliation":[]},{"given":"Kenichi","family":"Hagihara","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"3","key":"10.1016\/j.parco.2014.03.013_b0005","first-page":"65","article-title":"Volume rendering","volume":"22","author":"Drebin","year":"1988","journal-title":"Comput. Graphics (Proc. SIGGRAPH\u201988)"},{"issue":"2","key":"10.1016\/j.parco.2014.03.013_b0010","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1109\/38.50670","article-title":"Volumetric rendering of computed tomography data: principles and techniques","volume":"10","author":"Ney","year":"1990","journal-title":"IEEE Comput. Graphics Appl."},{"issue":"11\/12","key":"10.1016\/j.parco.2014.03.013_b0015","doi-asserted-by":"crossref","first-page":"1745","DOI":"10.1016\/j.parco.2003.05.015","article-title":"An improved binary-swap compositing for sort-last parallel rendering on distributed memory multiprocessors","volume":"29","author":"Takeuchi","year":"2003","journal-title":"Parallel Comput."},{"key":"10.1016\/j.parco.2014.03.013_b0020","unstructured":"S.P. Uselton, Volume Rendering for Computational Fluid Dynamics: Initial Results, Tech. Rep. RNR-91-026, Nasa Ames Research Center, 1991."},{"key":"10.1016\/j.parco.2014.03.013_b0025","unstructured":"D.S. Ebert, R. Yagel, J. Scott, Y. Kurzion, Volume rendering methods for computational fluid dynamics visualization, in: Proc. 5th IEEE Visualization Conf. (VIS\u201994), 1994, pp. 355\u2013361."},{"issue":"3","key":"10.1016\/j.parco.2014.03.013_b0030","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1016\/j.cag.2008.04.007","article-title":"A decompression pipeline for accelerating out-of-core volume rendering of time-varying data","volume":"32","author":"Nagayasu","year":"2008","journal-title":"Comput. Graphics"},{"key":"10.1016\/j.parco.2014.03.013_b0035","unstructured":"J. Kr\u00fcger, R. Westermann, Acceleration techniques for GPU-based volume rendering, in: Proc. 14th IEEE Visualization Conf. (VIS\u201903), 2003, pp. 232\u2013239."},{"issue":"1\/3","key":"10.1016\/j.parco.2014.03.013_b0040","first-page":"9","article-title":"Optimizing GPU volume rendering","volume":"14","author":"Rijters","year":"2006","journal-title":"J. WSCG"},{"issue":"2","key":"10.1016\/j.parco.2014.03.013_b0045","doi-asserted-by":"crossref","first-page":"96","DOI":"10.1109\/MC.2007.59","article-title":"How GPUs work","volume":"40","author":"Luebke","year":"2007","journal-title":"Computer"},{"key":"10.1016\/j.parco.2014.03.013_b0050","doi-asserted-by":"crossref","unstructured":"W. Hibbard, D. Santek, Interactivity is the key, in: Proc. Chapel Hill Workshop Volume Visualization (VVS \u201989), 1989, pp. 39\u201343.","DOI":"10.1145\/329129.329356"},{"key":"10.1016\/j.parco.2014.03.013_b0055","unstructured":"NVIDIA Corporation, CUDA Programming Guide Version 4.2. , 2012."},{"key":"10.1016\/j.parco.2014.03.013_b0060","unstructured":"Khronos OpenCL Working Group, The OpenCL specification version 1.1. , 2011."},{"key":"10.1016\/j.parco.2014.03.013_b0065","unstructured":"Y. Sugimoto, F. Ino, K. Hagihara, Improving cache locality for ray casting with CUDA, in: Proc. 25th Int\u2019l Conf. Architecture of Computing Systems Workshops (ARCS Workshops \u201912), 2012, pp. 339\u2013350."},{"key":"10.1016\/j.parco.2014.03.013_b0070","doi-asserted-by":"crossref","unstructured":"A. Nukada, S. Matsuoka, Auto-tuning 3-d FFT library for CUDA GPUs, in: Proc. Int\u2019l Conf. High Performance Computing, Networking, Storage and Analysis (SC\u201909), 2009, p. 10 (CD-ROM).","DOI":"10.1145\/1654059.1654090"},{"key":"10.1016\/j.parco.2014.03.013_b0075","doi-asserted-by":"crossref","unstructured":"P. Guo, L. Wang, Auto-tuning CUDA parameters for sparse matrix-vector multiplication on GPUs, in: Proc. Int\u2019l Conf. Computational and Information Sciences (ICCIS\u201910), 2010, pp. 1154\u20131157.","DOI":"10.1109\/ICCIS.2010.285"},{"key":"10.1016\/j.parco.2014.03.013_b0080","doi-asserted-by":"crossref","unstructured":"S. Kamil, C. Chan, L. Oliker, J. Shalf, S. Williams, An auto-tuning framework for parallel multicore stencil computations, in: Proc. 24th IEEE Int\u2019l Parallel and Distributed Processing Symp. (IPDPS\u201910), 2010, p. 12 (CD-ROM).","DOI":"10.1109\/IPDPS.2010.5470421"},{"issue":"10","key":"10.1016\/j.parco.2014.03.013_b0085","doi-asserted-by":"crossref","first-page":"1389","DOI":"10.1016\/j.jpdc.2008.05.011","article-title":"Program optimization carving for GPU computing","volume":"68","author":"Ryoo","year":"2008","journal-title":"J. Parallel Distrib. Comput."},{"key":"10.1016\/j.parco.2014.03.013_b0090","doi-asserted-by":"crossref","unstructured":"Y. Torres, A. Gonzalez-Escribano, D.R. Llanos, Using Fermi architecture knowledge to speed up CUDA and OpenCL programs, in: Proc. 10th Int\u2019l Symp. Parallel and Distributed Processing and Applications (ISPA\u201912), 2012, pp. 617\u2013624.","DOI":"10.1109\/ISPA.2012.92"},{"issue":"3","key":"10.1016\/j.parco.2014.03.013_b0095","doi-asserted-by":"crossref","first-page":"1150","DOI":"10.1007\/s11227-013-0921-z","article-title":"uBench: exposing the impact of CUDA block geometry in terms of performance","volume":"65","author":"Torres","year":"2013","journal-title":"J. Supercomput."},{"key":"10.1016\/j.parco.2014.03.013_b0100","unstructured":"Y. Liu, E.Z. Zhang, X. Shen, A cross-input adaptive framework for GPU program optimizations, in: Proc. 23th IEEE Int\u2019l Parallel and Distributed Processing Symp. (IPDPS\u201909), 2009, p. 10 (CD-ROM)."},{"issue":"8","key":"10.1016\/j.parco.2014.03.013_b0105","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1016\/j.parco.2011.10.002","article-title":"From CUDA to OpenCL: towards a performance-portable solution for multi-platform GPU programming","volume":"38","author":"Du","year":"2012","journal-title":"Parallel Comput."},{"issue":"1","key":"10.1016\/j.parco.2014.03.013_b0110","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1016\/j.jpdc.2012.04.003","article-title":"Graphics processing unit (GPU) programming strategies and trends in GPU computing","volume":"73","author":"Brodtkorb","year":"2013","journal-title":"J. Parallel Distrib. Comput."},{"key":"10.1016\/j.parco.2014.03.013_b0115","unstructured":"Z. Zheng, K. Mueller, Cache-aware GPU memory scheduling scheme for CT back-projection, in: Proc. Nuclear Science Symp. and Medical Imaging Conf. (NSS\/MIC\u201910), 2010, pp. 2248\u20132251."},{"issue":"2\/3","key":"10.1016\/j.parco.2014.03.013_b0120","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1016\/j.parco.2010.01.004","article-title":"High-performance cone beam reconstruction using CUDA compatible GPUs","volume":"36","author":"Okitsu","year":"2010","journal-title":"Parallel Comput."},{"issue":"10\/11","key":"10.1016\/j.parco.2014.03.013_b0125","doi-asserted-by":"crossref","first-page":"663","DOI":"10.1016\/j.parco.2007.09.006","article-title":"Cache-efficient numerical algorithms using graphics hardware","volume":"33","author":"Govindaraju","year":"2007","journal-title":"Parallel Comput."},{"issue":"3","key":"10.1016\/j.parco.2014.03.013_b0130","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1145\/78964.78965","article-title":"Efficient ray tracing of volume data","volume":"9","author":"Levoy","year":"1990","journal-title":"ACM Trans. Graphics"},{"issue":"3","key":"10.1016\/j.parco.2014.03.013_b0135","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1109\/38.511","article-title":"Display of surfaces from volume data","volume":"8","author":"Levoy","year":"1988","journal-title":"IEEE Comput. Graphics Appl."},{"issue":"2","key":"10.1016\/j.parco.2014.03.013_b0140","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1109\/MM.2005.37","article-title":"The GeForce 6800","volume":"25","author":"Montrym","year":"2005","journal-title":"IEEE Micro"},{"key":"10.1016\/j.parco.2014.03.013_b0145","series-title":"GPU Gems 2: Programming Techniques for High-Performance Graphics and General-Purpose Computation","year":"2005"},{"key":"10.1016\/j.parco.2014.03.013_b0150","unstructured":"G.M. Morton, A Computer Oriented Geodetic Data Base and A New Technique in File Sequencing, Tech. rep., IBM Ltd, Ottawa, Ontario, 1966."},{"key":"10.1016\/j.parco.2014.03.013_b0155","doi-asserted-by":"crossref","unstructured":"M. Matsui, F. Ino, K. Hagihara, Parallel volume rendering with early ray termination for visualizing large-scale datasets, in: Proc. 2nd Int\u2019l Symp. Parallel and Distributed Processing and Applications (ISPA\u201904), 2004, pp. 245\u2013256.","DOI":"10.1007\/978-3-540-30566-8_30"},{"key":"10.1016\/j.parco.2014.03.013_b0160","unstructured":"S. Roettger, S. Guthe, D. Weiskopf, T. Ertl, W. Strasser, Smart hardware-accelerated volume rendering, in: Proc. 5th Eurographics-IEEE TCVG Symp. Visualization (VisSym\u201903), 2003, pp. 231\u2013238."},{"key":"10.1016\/j.parco.2014.03.013_b0165","unstructured":"NVIDIA Corporation, GPU Computing SDK. , 2012."},{"key":"10.1016\/j.parco.2014.03.013_b0170","unstructured":"NVIDIA Corporation, OpenCL SDK. , 2012."}],"container-title":["Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S016781911400043X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S016781911400043X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,7,19]],"date-time":"2023-07-19T06:19:22Z","timestamp":1689747562000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S016781911400043X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,5]]},"references-count":34,"journal-issue":{"issue":"5-6","published-print":{"date-parts":[[2014,5]]}},"alternative-id":["S016781911400043X"],"URL":"http:\/\/dx.doi.org\/10.1016\/j.parco.2014.03.013","relation":{},"ISSN":["0167-8191"],"issn-type":[{"value":"0167-8191","type":"print"}],"subject":["Artificial Intelligence","Computer Graphics and Computer-Aided Design","Computer Networks and Communications","Hardware and Architecture","Theoretical Computer Science","Software"],"published":{"date-parts":[[2014,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Improving cache locality for GPU-based volume rendering","name":"articletitle","label":"Article Title"},{"value":"Parallel Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.parco.2014.03.013","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"Copyright \u00a9 2014 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}]}}