{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:38:52Z","timestamp":1740123532115,"version":"3.37.3"},"reference-count":22,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2015,11,30]],"date-time":"2015-11-30T00:00:00Z","timestamp":1448841600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100002790","name":"Canadian Network for Research and Innovation in Machining Technology, Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002790","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2016,2]]},"DOI":"10.1007\/s11227-015-1575-9","type":"journal-article","created":{"date-parts":[[2015,11,30]],"date-time":"2015-11-30T10:56:08Z","timestamp":1448880968000},"page":"468-502","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Tuning framework for stencil computation in heterogeneous parallel platforms"],"prefix":"10.1007","volume":"72","author":[{"given":"Taieb Lamine Ben","family":"Cheikh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandra","family":"Aguiar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sofiene","family":"Tahar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gabriela","family":"Nicolescu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,11,30]]},"reference":[{"key":"1575_CR1","doi-asserted-by":"crossref","unstructured":"Arabnia H (1995) A distributed stereocorrelation algorithm. In: Fourth International conference on computer communications and networks, pp 479\u2013482. doi: 10.1109\/ICCCN.1995.540163","DOI":"10.1109\/ICCCN.1995.540163"},{"issue":"3","key":"1575_CR2","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1007\/BF00130109","volume":"10","author":"H Arabnia","year":"1996","unstructured":"Arabnia H, Bhandarkar S (1996) Parallel stereocorrelation on a reconfigurable multi-ring network. J Supercomput 10(3):243\u2013269. doi: 10.1007\/BF00130109","journal-title":"J Supercomput"},{"key":"1575_CR3","doi-asserted-by":"crossref","unstructured":"Arabnia H, Oliver M (1987) Arbitrary rotation of raster images with SIMD machine architectures. Comput Graph Forum. doi: 10.1111\/j.1467-8659.1987.tb00340.x","DOI":"10.1111\/j.1467-8659.1987.tb00340.x"},{"key":"1575_CR4","first-page":"201","volume":"9","author":"S Bhandarkar","year":"1995","unstructured":"Bhandarkar S, Arabnia H, Smith J (1995) A reconfigurable architecture for image processing and computer vision. PRAI 9:201\u2013229","journal-title":"PRAI"},{"key":"1575_CR5","doi-asserted-by":"crossref","unstructured":"Calandra H, Dolbeau R, Fortin P, Lamotte JL, Said I (2013) Evaluation of successive CPUs\/APUs\/GPUs based on an OpenCL finite difference stencil. In: 21st euromicro international conference on parallel, distributed and network-based processing (PDP). IEEE, pp 405\u2013409","DOI":"10.1109\/PDP.2013.65"},{"key":"1575_CR6","unstructured":"Cook S (2013) CUDA programming: a developer\u2019s guide to parallel computing with GPUs, 1st edn. Morgan Kaufmann Publishers Inc., San Francisco, CA. ISBN 9780124159334, 9780124159884"},{"key":"1575_CR7","unstructured":"Cuda N (2014) NVIDIA CUDA C programming guide v7.0. Tech. rep. http:\/\/www.bibsonomy.org\/bibtex\/2e90a6474d85eac083c921cf5be29f6ef\/toevanen"},{"key":"1575_CR8","doi-asserted-by":"crossref","unstructured":"Datta K, Murphy M, Volkov V, Williams S, Carter J, Oliker L, Patterson D, Shalf J, Yelick K (2008) Stencil computation optimization and auto-tuning on state-of-the-art multicore architectures. In: Proceedings of the 2008 ACM\/IEEE conference on supercomputing. IEEE Press, p 4","DOI":"10.1109\/SC.2008.5222004"},{"key":"1575_CR9","doi-asserted-by":"crossref","unstructured":"Djabelkhir A, Seznec A (2003) Characterization of embedded applications for decoupled processor architecture. In: IEEE international workshop on workload characterization (WWC-6). IEEE, pp 119\u2013127","DOI":"10.1109\/WWC.2003.1249063"},{"key":"1575_CR10","unstructured":"Eberhart P, Said I, Fortin P, Calandra H (2014) Hybrid strategy for stencil computations on the apu. In: Proceedings of the 1st international workshop on high-performance stencil computations, Vienna, pp 43\u201349"},{"key":"1575_CR11","doi-asserted-by":"crossref","unstructured":"Grosser T, Cohen A, Kelly PH, Ramanujam J, Sadayappan P, Verdoolaege S (2013) Split tiling for gpus: automatic parallelization using trapezoidal tiles. In: Proceedings of the 6th workshop on general purpose processor using graphics processing units. ACM, pp 24\u201331","DOI":"10.1145\/2458523.2458526"},{"key":"1575_CR12","doi-asserted-by":"crossref","unstructured":"Krishnamoorthy S, Baskaran M, Bondhugula U, Ramanujam J, Rountev A, Sadayappan P (2007) Effective automatic parallelization of stencil computations. In: ACM sigplan notices, vol 42. ACM, pp 235\u2013244","DOI":"10.1145\/1250734.1250761"},{"key":"1575_CR13","first-page":"251","volume-title":"IKE","author":"D Luper","year":"2007","unstructured":"Luper D, Cameron D, Miller J, Arabnia HR (2007) Spatial and temporal target association through semantic analysis and gps data mining. In: Arabnia HR, Hashemi RR (eds) IKE. CSREA Press, USA, pp 251\u2013257"},{"issue":"4","key":"1575_CR14","first-page":"59","volume":"9","author":"T Lutz","year":"2013","unstructured":"Lutz T, Fensch C, Cole M (2013) Partans: an autotuning framework for stencil computation on multi-gpu systems. ACM Trans Archit Code Optim (TACO) 9(4):59","journal-title":"ACM Trans Archit Code Optim (TACO)"},{"key":"1575_CR15","doi-asserted-by":"crossref","unstructured":"Meng J, Skadron K (2009) Performance modeling and automatic ghost zone optimization for iterative stencil loops on GPUs. In: Proceedings of the 23rd international conference on supercomputing. ACM, pp 256\u2013265","DOI":"10.1145\/1542275.1542313"},{"key":"1575_CR16","doi-asserted-by":"crossref","unstructured":"Pienaar JA, Raghunathan A, Chakradhar S (2011) Mdr: performance model driven runtime for heterogeneous parallel platforms. In: Proceedings of the international conference on supercomputing. ACM, pp 225\u2013234","DOI":"10.1145\/1995896.1995933"},{"key":"1575_CR17","doi-asserted-by":"crossref","unstructured":"Rahbarinia B, Pedram M, Arabnia H, Alavi Z (2010) A multi-objective scheme to hide sequential patterns. In: The 2nd international conference on computer and automation engineering (ICCAE), vol 1, pp 153\u2013158. doi: 10.1109\/ICCAE.2010.5451977","DOI":"10.1109\/ICCAE.2010.5451977"},{"key":"1575_CR18","first-page":"81","volume":"2014","author":"S Tabik","year":"2014","unstructured":"Tabik S, Murarasu A, Romero LF (2014) Evaluating the fissionfusion transformation of an iterative multiple 3D-stencil on GPUs. HiStencils 2014:81","journal-title":"HiStencils"},{"key":"1575_CR19","doi-asserted-by":"crossref","unstructured":"Tang WT, Tan WJ, Krishnamoorthy R, Wong YW, Kuo Sh, Goh RSM, Turner SJ, Wong WF (2013) Optimizing and auto-tuning iterative stencil loops for GPUs with the in-plane method. In: IEEE 27th international symposium on parallel and distributed processing (IPDPS). IEEE, pp 452\u2013462","DOI":"10.1109\/IPDPS.2013.79"},{"key":"1575_CR20","doi-asserted-by":"crossref","unstructured":"Tang Y, Chowdhury RA, Kuszmaul BC, Luk CK, Leiserson CE (2011) The pochoir stencil compiler. In: Proceedings of the twenty-third annual ACM symposium on parallelism in algorithms and architectures. ACM, pp 117\u2013128","DOI":"10.1145\/1989493.1989508"},{"key":"1575_CR21","doi-asserted-by":"crossref","unstructured":"Wu H, Diamos G, Wang J, Cadambi S, Yalamanchili S, Chakradhar S (2012) Optimizing data warehousing applications for GPUs using kernel fusion\/fission. In: IEEE 26th international on parallel and distributed processing symposium workshops and PhD forum (IPDPSW). IEEE, pp 2433\u20132442","DOI":"10.1109\/IPDPSW.2012.300"},{"key":"1575_CR22","doi-asserted-by":"crossref","unstructured":"Xu C, Kirk SR, Jenkins S (2009) Tiling for performance tuning on different models of gpus. In: Proceedings of the 2009 second international symposium on information science and engineering (ISISE\u201909). IEEE Computer Society, Washington, DC, pp 500\u2013504. doi: 10.1109\/ISISE.2009.60","DOI":"10.1109\/ISISE.2009.60"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1575-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-015-1575-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1575-9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T06:40:44Z","timestamp":1559371244000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-015-1575-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,11,30]]},"references-count":22,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2016,2]]}},"alternative-id":["1575"],"URL":"https:\/\/doi.org\/10.1007\/s11227-015-1575-9","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2015,11,30]]}}}