{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T16:38:37Z","timestamp":1717778317480},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2015,5,29]],"date-time":"2015-05-29T00:00:00Z","timestamp":1432857600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2015,9]]},"DOI":"10.1007\/s11227-015-1443-7","type":"journal-article","created":{"date-parts":[[2015,5,28]],"date-time":"2015-05-28T09:32:36Z","timestamp":1432805556000},"page":"3440-3455","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Fast filter bank convolution for three-dimensional wavelet transform by shared memory on mobile GPU computing"],"prefix":"10.1007","volume":"71","author":[{"given":"Di","family":"Zhao","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,5,29]]},"reference":[{"key":"1443_CR1","doi-asserted-by":"crossref","unstructured":"Bordawekar R, Bondhugula U, Rao R (2010) Believe it or not: mult-core CPUs can match GPU performance for a FLOP-intensive application! In: Proceedings of the 19th international conference on Parallel architectures and compilation techniques, 2010. ACM, Vienna, Austria, pp. 537\u2013538","DOI":"10.1145\/1854273.1854340"},{"key":"1443_CR2","doi-asserted-by":"crossref","unstructured":"Huang Q et al (2008) GPU as a general purpose computing resource. In: Ninth international conference on parallel and distributed computing, applications and technologies, 2008. PDCAT 2008","DOI":"10.1109\/PDCAT.2008.38"},{"key":"1443_CR3","doi-asserted-by":"crossref","unstructured":"Suda R et al (2009) Aspects of GPU for general purpose high performance computing. In: Proceedings of the 2009 Asia and South Pacific Design Automation Conference. 2009. IEEE Press, Yokohama, Japan, pp 216\u2013223","DOI":"10.1109\/ASPDAC.2009.4796483"},{"key":"1443_CR4","doi-asserted-by":"crossref","unstructured":"Collange S, Defour D, Tisserand A (2009) Power consumption of GPUs from a software perspective. In: Allen G et al (eds) Computational science\u2014ICCS 2009. Springer, Berlin, pp 914\u2013923","DOI":"10.1007\/978-3-642-01970-8_92"},{"key":"1443_CR5","volume-title":"CUDA by example: an introduction to general-purpose GPU programming","author":"J Sanders","year":"2010","unstructured":"Sanders J, Kandrot E (2010) CUDA by example: an introduction to general-purpose GPU programming. Pearson education, Boston"},{"issue":"3","key":"1443_CR6","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1007\/s10766-012-0201-1","volume":"41","author":"C Gou","year":"2013","unstructured":"Gou C, Gaydadjiev GN (2013) Addressing GPU on-chip shared memory bank conflicts using elastic pipeline. Int J Parallel Program 41(3):400\u2013429","journal-title":"Int J Parallel Program"},{"key":"1443_CR7","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-16405-7","volume-title":"GPU solutions to multi-scale problems in science and engineering","author":"DA Yuen","year":"2013","unstructured":"Yuen DA et al (2013) GPU solutions to multi-scale problems in science and engineering. Springer, Berlin"},{"key":"1443_CR8","unstructured":"Lobeiras J, Amor M, Doallo R (2011) Performance evaluation of GPU memory hierarchy using the FFT. In: The 11th international conference on computational and mathematical methods in science and engineering, CMMSE 2011"},{"issue":"3","key":"1443_CR9","doi-asserted-by":"crossref","first-page":"152","DOI":"10.1145\/1555815.1555775","volume":"37","author":"S Hong","year":"2009","unstructured":"Hong S, Kim H (2009) An analytical model for a GPU architecture with memory-level and thread-level parallelism awareness. SIGARCH Comput Arch News 37(3):152\u2013163","journal-title":"SIGARCH Comput Arch News"},{"key":"1443_CR10","doi-asserted-by":"crossref","unstructured":"Ryoo S et al (2008) Optimization principles and application performance evaluation of a multithreaded GPU using CUDA. In: Proceedings of the 13th ACM SIGPLAN symposium on principles and practice of parallel programming. ACM, Salt Lake City, UT, USA, pp 73\u201382","DOI":"10.1145\/1345206.1345220"},{"key":"1443_CR11","doi-asserted-by":"crossref","unstructured":"Luebke D (2008) CUDA: scalable parallel programming for high-performance scientific computing. In: 5th IEEE international symposium on biomedical imaging: from nano to macro, 2008. ISBI 2008","DOI":"10.1109\/ISBI.2008.4541126"},{"key":"1443_CR12","doi-asserted-by":"crossref","unstructured":"Ryoo S et al (2008) Program optimization space pruning for a multithreaded gpu. In: Proceedings of the 6th annual IEEE\/ACM international symposium on code generation and optimization, 2008. ACM, Boston, MA, USA, pp 195\u2013204","DOI":"10.1145\/1356058.1356084"},{"issue":"5","key":"1443_CR13","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1145\/1837853.1693470","volume":"45","author":"SS Baghsorkhi","year":"2010","unstructured":"Baghsorkhi SS et al (2010) An adaptive performance modeling tool for GPU architectures. SIGPLAN Not 45(5):105\u2013114","journal-title":"SIGPLAN Not"},{"key":"1443_CR14","doi-asserted-by":"crossref","unstructured":"Zhao D, Yu J (2015) Efficiently solving tri-diagonal system by chunked cyclic reduction and single-GPU shared memory. J Supercomput 71(2):369\u2013390","DOI":"10.1007\/s11227-014-1299-2"},{"issue":"6","key":"1443_CR15","doi-asserted-by":"crossref","first-page":"804","DOI":"10.1109\/TC.2011.112","volume":"61","author":"L Shi","year":"2012","unstructured":"Shi L et al (2012) vCUDA: GPU-accelerated high-performance computing in virtual machines. IEEE Trans Comput 61(6):804\u2013816","journal-title":"IEEE Trans Comput"},{"key":"1443_CR16","doi-asserted-by":"crossref","unstructured":"Gou C, Gaydadjiev GN (2011) Elastic pipeline: addressing GPU on-chip shared memory bank conflicts. In: Proceedings of the 8th ACM international conference on computing frontiers, 2011. ACM, Ischia, Italy, pp 1\u201311","DOI":"10.1145\/2016604.2016608"},{"issue":"6","key":"1443_CR17","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1145\/1809028.1806606","volume":"45","author":"Y Yang","year":"2010","unstructured":"Yang Y et al (2010) A GPGPU compiler for memory optimization and parallelism management. SIGPLAN Not 45(6):86\u201397","journal-title":"SIGPLAN Not"},{"issue":"10","key":"1443_CR18","doi-asserted-by":"crossref","first-page":"1370","DOI":"10.1016\/j.jpdc.2008.05.014","volume":"68","author":"S Che","year":"2008","unstructured":"Che S et al (2008) A performance study of general-purpose applications on graphics processors using CUDA. J Parallel Distrib Comput 68(10):1370\u20131380","journal-title":"J Parallel Distrib Comput"},{"key":"1443_CR19","doi-asserted-by":"crossref","unstructured":"Han TD, Abdelrahman TS (2009) hiCUDA: a high-level directive-based language for GPU programming. In: Proceedings of 2nd workshop on general purpose processing on graphics processing units, 2009. ACM, Washington, DC, pp 52\u201361","DOI":"10.1145\/1513895.1513902"},{"key":"1443_CR20","unstructured":"Mei C, Jiang H, Jenness J (2010) CUDA-based AES parallelization with fine-tuned GPU memory utilization. In: IEEE international symposium on parallel and distributed processing, workshops and Phd forum (IPDPSW), 2010"},{"key":"1443_CR21","doi-asserted-by":"crossref","unstructured":"Govindaraju NK et al (2006) A memory model for scientific algorithms on graphics processors. In: SC 2006 Conference, Proceedings of the ACM\/IEEE","DOI":"10.1109\/SC.2006.2"},{"key":"1443_CR22","doi-asserted-by":"crossref","unstructured":"Gupta V et al (2009) GViM: GPU-accelerated virtual machines. In: Proceedings of the 3rd ACM workshop on system-level virtualization for high performance computing, 2009. ACM, Nuremburg, Germany, pp 17\u201324","DOI":"10.1145\/1519138.1519141"},{"issue":"3","key":"1443_CR23","doi-asserted-by":"crossref","first-page":"663","DOI":"10.1007\/s11432-011-4497-z","volume":"55","author":"D Chen","year":"2012","unstructured":"Chen D, Chen W, Zheng W (2012) CUDA-Zero: a framework for porting shared memory GPU applications to multi-GPUs. Sci China Inf Sci 55(3):663\u2013676","journal-title":"Sci China Inf Sci"},{"key":"1443_CR24","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1016\/j.compfluid.2014.01.005","volume":"93","author":"KI Karantasis","year":"2014","unstructured":"Karantasis KI, Polychronopoulos ED, Ekaterinaris JA (2014) High order accurate simulation of compressible flows on GPU clusters over software distributed shared memory. Comput Fluids 93:18\u201329","journal-title":"Comput Fluids"},{"key":"1443_CR25","doi-asserted-by":"crossref","unstructured":"Ji F, Ma X (2011) Using shared memory to accelerate MapReduce on graphics processing units. In: 2011 IEEE international parallel and distributed processing symposium (IPDPS), IEEE","DOI":"10.1109\/IPDPS.2011.80"},{"key":"1443_CR26","doi-asserted-by":"crossref","unstructured":"Che S, Sheaffer JW, Skadron K (2011) Dymaxion: optimizing memory access patterns for heterogeneous systems. In: Proceedings of 2011 international conference for high performance computing, networking, storage and analysis, 2011. ACM, Seattle, Washington, pp 1\u201311","DOI":"10.1145\/2063384.2063401"},{"key":"1443_CR27","doi-asserted-by":"crossref","unstructured":"Lee W-J et al (2012) SGRT: a scalable mobile GPU architecture based on ray tracing. In: ACM SIGGRAPH 2012 posters, 2012. ACM, Los Angeles, California","DOI":"10.1145\/2343045.2343048"},{"key":"1443_CR28","doi-asserted-by":"crossref","unstructured":"Lee W-J et al (2013) SGRT: a mobile GPU architecture for real-time ray tracing. In: Proceedings of the 5th high-performance graphics conference, 2013. ACM, Anaheim, California, pp 109\u2013119","DOI":"10.1145\/2492045.2492057"},{"key":"1443_CR29","doi-asserted-by":"crossref","unstructured":"Nah J-H et al (2010) MobiRT: an implementation of OpenGL ES-based CPU\u2013GPU hybrid ray tracer for mobile devices. In: ACM SIGGRAPH ASIA 2010 sketches, 2010. ACM, Seoul, Republic of Korea, pp 1\u20132","DOI":"10.1145\/1899950.1900000"},{"key":"1443_CR30","doi-asserted-by":"crossref","unstructured":"Singhal N et al (2011) Design and optimization of image processing algorithms on mobile GPU. In: ACM SIGGRAPH 2011 posters, 2011. ACM, Vancouver, British Columbia, Canada, pp 1\u20131","DOI":"10.1145\/2037715.2037741"},{"issue":"9","key":"1443_CR31","doi-asserted-by":"crossref","first-page":"1292","DOI":"10.1109\/TCSVT.2012.2199389","volume":"22","author":"A Abramov","year":"2012","unstructured":"Abramov A et al (2012) Real-time segmentation of stereo videos on a portable system with a mobile GPU. IEEE Trans Circuits Syst Video Technol 22(9):1292\u20131305","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"1443_CR32","doi-asserted-by":"crossref","unstructured":"Singhal N, Yoo JW, Choi HY, Park IK (2010) Implementation and optimization of image processing algorithms on handheld GPU. In: 2010 17th IEEE international conference on image processing (ICIP)","DOI":"10.1109\/ICIP.2010.5651740"},{"key":"1443_CR33","doi-asserted-by":"crossref","unstructured":"Bachoo A (2010) Using the CPU and GPU for real-time video enhancement on a mobile computer. In: 2010 IEEE 10th international conference on signal processing (ICSP)","DOI":"10.1109\/ICOSP.2010.5657164"},{"issue":"1","key":"1443_CR34","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1007\/s11042-012-1252-4","volume":"69","author":"MB L\u00f3pez","year":"2014","unstructured":"L\u00f3pez MB et al (2014) Interactive multi-frame reconstruction for mobile devices. Multimed Tools Appl 69(1):31\u201351","journal-title":"Multimed Tools Appl"},{"key":"1443_CR35","doi-asserted-by":"crossref","unstructured":"Rister B, Wang G, Wu M, Cavallaro JR (2013) A fast and efficient sift detector using the mobile GPU. In: 2013 IEEE international conference on acoustics, speech and signal processing (ICASSP)","DOI":"10.1109\/ICASSP.2013.6638141"},{"key":"1443_CR36","unstructured":"Cheng K-T, Wang Y-C (2011) Using mobile GPU for general-purpose computing\u2014a case study of face recognition on smartphones. In: 2011 international symposium on VLSI design, automation and test (VLSI-DAT)"},{"key":"1443_CR37","doi-asserted-by":"crossref","unstructured":"Wang G et al (2013) Accelerating computer vision algorithms using OpenCL framework on the mobile GPU\u2014a case study. In: 2013 IEEE international conference on acoustics, speech and signal processing (ICASSP)","DOI":"10.1109\/ICASSP.2013.6638132"},{"key":"1443_CR38","doi-asserted-by":"crossref","unstructured":"Wang Y-C, Donyanavard B, Cheng K-T (2012) Energy-aware real-time face recognition system on mobile CPU-GPU platform. In: Kutulakos KN (ed) Trends and topics in computer vision. Springer, Berlin, pp 411\u2013422","DOI":"10.1007\/978-3-642-35740-4_32"},{"key":"1443_CR39","doi-asserted-by":"crossref","unstructured":"Wang Y-C, Cheng K-T (2011) Energy-optimized mapping of application to smartphone platform\u2014a case study of mobile face recognition. In: 2011 IEEE computer society conference on computer vision and pattern recognition workshops (CVPRW)","DOI":"10.1109\/CVPRW.2011.5981820"},{"key":"1443_CR40","doi-asserted-by":"crossref","unstructured":"Wang Y-C, Pang S, Cheng K-T (2010) A GPU-accelerated face annotation system for smartphones. In: Proceedings of the international conference on Multimedia, 2010. ACM, Firenze, Italy, pp 1667\u20131668","DOI":"10.1145\/1873951.1874317"},{"key":"1443_CR41","doi-asserted-by":"crossref","unstructured":"Hartl A et al (2011) Rapid reconstruction of small objects on mobile phones. In: 2011 IEEE computer society conference on computer vision and pattern recognition workshops (CVPRW)","DOI":"10.1109\/CVPRW.2011.5981789"},{"key":"1443_CR42","unstructured":"Nvidia (2014) NVIDIA Tegra K1 A new era in mobile computing. NVIDIA Corporation, San Jose, California"},{"key":"1443_CR43","unstructured":"Zhao D et al (2014) Acceleration of l1-regularization MRI reconstruction by lookup table and GPU shared memory based DWT. In: GPU technology conference, 2014, San Jose California"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1443-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-015-1443-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1443-7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T06:40:40Z","timestamp":1559371240000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-015-1443-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,5,29]]},"references-count":43,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2015,9]]}},"alternative-id":["1443"],"URL":"https:\/\/doi.org\/10.1007\/s11227-015-1443-7","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,5,29]]}}}