{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,12,31]],"date-time":"2022-12-31T11:12:26Z","timestamp":1672485146603},"reference-count":25,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2015,9,1]],"date-time":"2015-09-01T00:00:00Z","timestamp":1441065600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2015,11]]},"DOI":"10.1007\/s11227-015-1514-9","type":"journal-article","created":{"date-parts":[[2015,8,31]],"date-time":"2015-08-31T01:50:53Z","timestamp":1440985853000},"page":"4192-4214","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Interval-based performance modeling for the all-pairs-shortest-path problem on GPUs"],"prefix":"10.1007","volume":"71","author":[{"given":"J\u00f6rg","family":"D\u00fcmmler","sequence":"first","affiliation":[]},{"given":"Sebastian","family":"Egerland","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,9,1]]},"reference":[{"issue":"5","key":"1514_CR1","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1145\/1837853.1693470","volume":"45","author":"S Baghsorkhi","year":"2010","unstructured":"Baghsorkhi S, Delahaye M, Patel S, Gropp W, Hwu WmW (2010) An adaptive performance modeling tool for GPU architectures. SIGPLAN Not 45(5):105\u2013114","journal-title":"SIGPLAN Not"},{"issue":"1","key":"1514_CR2","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1016\/j.jpdc.2012.04.003","volume":"73","author":"A Brodtkorb","year":"2013","unstructured":"Brodtkorb A, Hagen T, S\u00e6tra M (2013) Graphics processing unit (GPU) programming strategies and trends in GPU computing. J Parallel Distrib Comput 73(1):4\u201313","journal-title":"J Parallel Distrib Comput"},{"issue":"5\u20136","key":"1514_CR3","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1016\/j.parco.2009.12.002","volume":"36","author":"A Bulu\u00e7","year":"2010","unstructured":"Bulu\u00e7 A, Gilbert J, Budak C (2010) Solving path problems on the GPU. Parallel Comput 36(5\u20136):241\u2013253","journal-title":"Parallel Comput"},{"issue":"2","key":"1514_CR4","doi-asserted-by":"crossref","first-page":"238","DOI":"10.1177\/1094342013507960","volume":"28","author":"S Che","year":"2014","unstructured":"Che S, Skadron K (2014) BenchFriend: correlating the performance of GPU benchmarks. Int J High Perform Comput Appl 28(2):238\u2013250","journal-title":"Int J High Perform Comput Appl"},{"key":"1514_CR5","doi-asserted-by":"crossref","unstructured":"Guo P, Wang L (2014) Accurate cross architecture performance modeling for sparse matrix\u2014vector multiplication (SpMV) on GPUs. Concurr Comput Pract Exp 27(13):3281\u20133294. doi: 10.1002\/cpe.3217","DOI":"10.1002\/cpe.3217"},{"key":"1514_CR6","doi-asserted-by":"crossref","unstructured":"Harish P, Narayanan P (2007) Accelerating large graph algorithms on the GPU using CUDA. In: Proceedings of the 14th international conference on high performance computing (HiPC \u201907). Springer, Berlin pp 197\u2013208","DOI":"10.1007\/978-3-540-77220-0_21"},{"key":"1514_CR7","doi-asserted-by":"crossref","unstructured":"Hasan K, Chatterjee A, Radhakrishnan S, Antonio J (2014) Performance prediction model and analysis for compute-intensive tasks on GPUs. In: Hsu CH, Shi X, Salapura V (eds) Proceedings of the 11th IFIP international conference on network and parallel computing (NPC\u201914). Lecture notes in computer science, vol 8707. Springer, Berlin, pp 612\u2013617","DOI":"10.1007\/978-3-662-44917-2_65"},{"issue":"3","key":"1514_CR8","doi-asserted-by":"crossref","first-page":"152","DOI":"10.1145\/1555815.1555775","volume":"37","author":"S Hong","year":"2009","unstructured":"Hong S, Kim H (2009) An analytical model for a GPU architecture with memory-level and thread-level parallelism awareness. SIGARCH Comput Archit News 37(3):152\u2013163","journal-title":"SIGARCH Comput Archit News"},{"key":"1514_CR9","doi-asserted-by":"crossref","unstructured":"Hu Z, Liu G, Dong W (2014) A throughput-aware analytical performance model for GPU applications. In: Proceedings of the 10th annual conference on advanced computer architecture (ACA \u201914). Springer, Berlin, pp 98\u2013112","DOI":"10.1007\/978-3-662-44491-7_8"},{"key":"1514_CR10","doi-asserted-by":"crossref","unstructured":"Karami A, Mirsoleimani S, Khunjush F (2013) A statistical performance prediction model for OpenCL kernels on NVIDIA GPUs. In: Proceedings of the 17th CSI international symposium on computer architecture and digital systems (CADS \u201913). IEEE, pp 15\u201322","DOI":"10.1109\/CADS.2013.6714232"},{"key":"1514_CR11","doi-asserted-by":"crossref","unstructured":"Kerr A, Diamos G, Yalamanchili S (2010) Modeling GPU\u2013CPU workloads and systems. In: Proceedings of the 3rd workshop on general-purpose computation on graphics processing units (GPGPU \u201910). ACM, New York, pp 31\u201342","DOI":"10.1145\/1735688.1735696"},{"key":"1514_CR12","unstructured":"Khronos OpenCL Working Group (2013) The OpenCL specification, Version 2.0. http:\/\/www.khronos.org\/opencl"},{"key":"1514_CR13","doi-asserted-by":"crossref","unstructured":"Kothapalli K, Mukherjee R, Rehman M, Patidar S, Narayanan P, Srinathan K (2009) A performance prediction model for the CUDA GPGPU platform. In: Proceedings of the 2009 international conference on high performance computing (HiPC \u201909). IEEE, pp 463\u2013472","DOI":"10.1109\/HIPC.2009.5433179"},{"key":"1514_CR14","unstructured":"Lai J, Seznec A (2013) Performance upper bound analysis and optimization of SGEMM on Fermi and Kepler GPUs. In: Proceedings of the 2013 IEEE\/ACM international symposium on code generation and optimization (CGO \u201913). IEEE Computer Society, Washington, pp 1\u201310"},{"key":"1514_CR15","unstructured":"Lawler E (1976) Combinatorial optimization: networks and matroids. Holt, Rinehart and Winston, New York"},{"issue":"99","key":"1514_CR16","first-page":"1","volume":"PP","author":"U Lopez-Novoa","year":"2014","unstructured":"Lopez-Novoa U, Mendiburu A, Miguel-Alonso J (2014) A survey of performance modeling and simulation techniques for accelerator-based computing. IEEE Trans Parallel Distrib Syst PP(99):1\u20131","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"1514_CR17","doi-asserted-by":"crossref","unstructured":"Ma L, Chamberlain R, Agrawal K (2014) Performance modeling for highly-threaded many-core GPUs. In: Proceedings of the 25th international conference on application-specific systems, architectures and processors (ASAP \u201914). IEEE, pp. 84\u201391","DOI":"10.1109\/ASAP.2014.6868641"},{"key":"1514_CR18","doi-asserted-by":"crossref","unstructured":"Meng J, Morozov V, Kumaran K, Vishwanath V, Uram T (2011) GROPHECY: GPU performance projection from CPU code skeletons. In: Proceedings of the 2011 international conference for high performance computing, networking, storage and analysis (SC \u201911). ACM, New York, pp 14:1\u201314:11","DOI":"10.1145\/2063384.2063402"},{"key":"1514_CR19","unstructured":"Nvidia: CUDA occupancy calculator. http:\/\/developer.download.nvidia.com\/compute\/cuda\/CUDA_Occupancy_calculator.xls"},{"key":"1514_CR20","unstructured":"Nvidia (2015) CUDA toolkit documentation, Version 7.0. http:\/\/docs.nvidia.com\/cuda"},{"key":"1514_CR21","doi-asserted-by":"crossref","unstructured":"Sato K, Komatsu K, Takizawa H, Kobayashi H (2011) A history-based performance prediction model with profile data classification for automatic task allocation in heterogeneous computing systems. In: Proceedings of the 9th international symposium on parallel and distributed processing with applications (ISPA \u201911). IEEE Computer Society, Washington, pp 135\u2013142","DOI":"10.1109\/ISPA.2011.36"},{"issue":"8","key":"1514_CR22","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1145\/2370036.2145819","volume":"47","author":"J Sim","year":"2012","unstructured":"Sim J, Dasgupta A, Kim H, Vuduc R (2012) A performance analysis framework for identifying potential benefits in GPGPU applications. SIGPLAN Not 47(8):11\u201322","journal-title":"SIGPLAN Not"},{"issue":"3","key":"1514_CR23","doi-asserted-by":"crossref","first-page":"1150","DOI":"10.1007\/s11227-013-0921-z","volume":"65","author":"Y Torres","year":"2013","unstructured":"Torres Y, Gonzalez-Escribano A, Llanos D (2013) uBench: exposing the impact of CUDA block geometry in terms of performance. J Supercomput 65(3):1150\u20131163","journal-title":"J Supercomput"},{"key":"1514_CR24","doi-asserted-by":"crossref","unstructured":"Tran QN (2010) Designing efficient many-core parallel algorithms for all-pairs shortest-paths using CUDA. In: Proceedings of the 7th international conference on information technology: new generations (ITNG \u201910). IEEE Computer Society, Washington, pp 7\u201312","DOI":"10.1109\/ITNG.2010.230"},{"key":"1514_CR25","doi-asserted-by":"crossref","unstructured":"Zhang Y, Owens J (2011) A quantitative performance analysis model for GPU architectures. In: Proceedings of the 17th IEEE international symposium on high performance computer architecture (HPCA \u201911). IEEE Computer Society, Washington, pp 382\u2013393","DOI":"10.1109\/HPCA.2011.5749745"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1514-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-015-1514-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1514-9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T10:40:42Z","timestamp":1559385642000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-015-1514-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,9,1]]},"references-count":25,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2015,11]]}},"alternative-id":["1514"],"URL":"https:\/\/doi.org\/10.1007\/s11227-015-1514-9","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,9,1]]}}}