{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T10:12:14Z","timestamp":1767262334903},"publisher-location":"Cham","reference-count":14,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319143125"},{"type":"electronic","value":"9783319143132"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-14313-2_13","type":"book-chapter","created":{"date-parts":[[2014,12,11]],"date-time":"2014-12-11T01:56:46Z","timestamp":1418263006000},"page":"146-157","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A Study of the Potential of Locality-Aware Thread Scheduling for GPUs"],"prefix":"10.1007","author":[{"given":"Cedric","family":"Nugteren","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gert-Jan","family":"van den Braak","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Henk","family":"Corporaal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"13_CR1","doi-asserted-by":"crossref","unstructured":"Bakhoda, A., Yuan, G., Fung, W., Wong, H., Aamodt, T.: Analyzing CUDA Workloads using a Detailed GPU Simulator. In: ISPASS: International Symposium on Performance Analysis of Systems and Software. IEEE (2009)","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"13_CR2","doi-asserted-by":"crossref","unstructured":"Ding, C., Zhong, Y.: Predicting Whole-Program Locality through Reuse Distance Analysis. In: PLDI-24: Conference on Programming Language Design and Implementation. ACM (2003)","DOI":"10.1145\/781131.781159"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Fuller, S., Millett, L.: Computing Performance: Game Over or Next Level? IEEE Computer 44 (2011)","DOI":"10.1109\/MC.2011.15"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Gebhart, M., Johnson, D., Tarjan, D., Keckler, S., Dally, W., Lindholm, E., Skadron, K.: A Hierarchical Thread Scheduler and Register File for Energy-Efficient Throughput Processors. ACM Trans. on Computer Systems 30, 8:1\u20138:38 (2012)","DOI":"10.1145\/2166879.2166882"},{"key":"13_CR5","unstructured":"Kayiran, O., Jog, A., Kandemir, M., Das, C.: Neither More Nor Less: Optimizing Thread-level Parallelism for GPGPUs. In: PACT-22: International Conference on Parallel Architectures and Compilation Techniques. IEEE (2013)"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Lashgar, A., Baniasadi, A., Khonsari, A.: Dynamic Warp Resizing: Analysis and Benefits in High-Performance SIMT. In: ICCD-30: International Conference on Computer Design. IEEE (2012)","DOI":"10.1109\/ICCD.2012.6378694"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"Meng, J., Tarjan, D., Skadron, K.: Dynamic Warp Subdivision for Integrated Branch and Memory Divergence Tolerance. In: ISCA-37: International Symposium on Computer Architecture. ACM (2010)","DOI":"10.1145\/1815961.1815992"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Narasiman, V., Shebanow, M., Lee, C., Miftakhutdinov, R., Mutlu, O., Patt, Y.: Improving GPU Performance via Large Warps and Two-level Warp Scheduling. In: MICRO-44: International Symposium on Microarchitecture. ACM (2011)","DOI":"10.1145\/2155620.2155656"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Nugteren, C., van den Braak, G.-J., Corporaal, H., Bal, H.: A Detailed GPU Cache Model Based on Reuse Distance Theory. In: HPCA-20: International Symposium on High Performance Computer Architecture. IEEE (2014)","DOI":"10.1109\/HPCA.2014.6835955"},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"NVIDIA. CUDA C Programming Guide 5.5 (2013)","DOI":"10.1016\/S1353-4858(13)70015-1"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Philbin, J., Edler, J., Anshus, O., Douglas, C., Li, K.: Thread Scheduling for Cache Locality. In: ASPLOS-7: International Conference on Architectural Support for Programming Languages and Operating Systems. ACM (1996)","DOI":"10.1145\/237090.237151"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Rogers, T., O\u2019Connor, M., Aamodt, T.: Cache-Conscious Wavefront Scheduling. In: MICRO-45: International Symposium on Microarchitecture. IEEE (2012)","DOI":"10.1109\/MICRO.2012.16"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Stratton, J., Anssari, N., Rodrigues, C., Sung, I.-J., Obeid, N., Chang, L., Liu, G., Hwu, W.: Optimization and Architecture Effects on GPU Computing Workload Performance. In: INPAR: Workshop on Innovative Parallel Computing. IEEE (2012)","DOI":"10.1109\/InPar.2012.6339605"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Tam, D., Azimi, R., Stumm, M.: Thread Clustering: Sharing-Aware Scheduling on SMP-CMP-SMT Multiprocessors. In: EuroSys-2: European Conference on Computer Systems. ACM (2007)","DOI":"10.1145\/1272996.1273004"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2014: Parallel Processing Workshops"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-14313-2_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T00:24:48Z","timestamp":1587342288000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-14313-2_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319143125","9783319143132"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-14313-2_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}