{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T04:16:12Z","timestamp":1748751372745,"version":"3.41.0"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319284293"},{"type":"electronic","value":"9783319284309"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-28430-9_12","type":"book-chapter","created":{"date-parts":[[2016,1,9]],"date-time":"2016-01-09T09:32:28Z","timestamp":1452331948000},"page":"153-164","source":"Crossref","is-referenced-by-count":0,"title":["Making GPU Warp Scheduler and Memory Scheduler Synchronization-Aware"],"prefix":"10.1007","author":[{"given":"Jianliang","family":"Ma","sequence":"first","affiliation":[]},{"given":"Tianzhou","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Minghui","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,1,10]]},"reference":[{"issue":"3","key":"12_CR1","doi-asserted-by":"publisher","first-page":"416","DOI":"10.1145\/2366231.2337207","volume":"40","author":"R Ausavarungnirun","year":"2012","unstructured":"Ausavarungnirun, R., Chang, K.K.W., Subramanian, L., Loh, G.H., Mutlu, O.: Staged memory scheduling: Achieving high performance and scalability in heterogeneous systems. SIGARCH Comput. Archit. News 40(3), 416\u2013427 (2012)","journal-title":"SIGARCH Comput. Archit. News"},{"issue":"2","key":"12_CR2","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1109\/L-CA.2011.32","volume":"11","author":"BN Lakshminarayana","year":"2012","unstructured":"Lakshminarayana, B.N., Lee, J., Kim, H., Shin, J.: Dram scheduling policy for GPGPU architectures based on a potential function. IEEE Comput. Archit. Lett. 11(2), 33\u201336 (2012)","journal-title":"IEEE Comput. Archit. Lett."},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Bakhoda, A., Yuan, G., Fung, W., Wong, H., Aamodt, T.: Analyzing CUDA workloads using a detailed GPU simulator. In: 2009 IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS 2009, pp. 163\u2013174, April 2009","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Chatterjee, N., O\u2019Connor, M., Loh, G.H., Jayasena, N., Balasubramonian, R.: Managing dram latency divergence in irregular GPGPU applications. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2014, pp. 128\u2013139. IEEE Press, Piscataway (2014)","DOI":"10.1109\/SC.2014.16"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Che, S., Boyer, M., Meng, J., Tarjan, D., Sheaffer, J., Lee, S.H., Skadron, K.: Rodinia: a benchmark suite for heterogeneous computing. In: 2009 IEEE International Symposium on Workload Characterization, IISWC 2009, pp. 44\u201354, October 2009","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Chen, J., Tao, X., Yang, Z., Peir, J.K., Li, X., Lu, S.L.: Guided region-based GPU scheduling: utilizing multi-thread parallelism to hide memory latency. In: 2013 IEEE 27th International Symposium on Parallel Distributed Processing (IPDPS), pp. 441\u2013451, May 2013","DOI":"10.1109\/IPDPS.2013.95"},{"issue":"3","key":"12_CR7","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1145\/2024723.2000093","volume":"39","author":"M Gebhart","year":"2011","unstructured":"Gebhart, M., Johnson, D.R., Tarjan, D., Keckler, S.W., Dally, W.J., Lindholm, E., Skadron, K.: Energy-efficient mechanisms for managing thread context in throughput processors. SIGARCH Comput. Archit. News 39(3), 235\u2013246 (2011)","journal-title":"SIGARCH Comput. Archit. News"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"He, B., Fang, W., Luo, Q., Govindaraju, N.K., Wang, T.: Mars: a mapreduce framework on graphics processors. In: Proceedings of the 17th International Conference on Parallel Architectures and Compilation Techniques, PACT 2008, pp. 260\u2013269. ACM, New York (2008)","DOI":"10.1145\/1454115.1454152"},{"key":"12_CR9","unstructured":"hynix: \u201chynix gddr5 sgram part h5gq1h24afr\u201d (2009). www.hynix.com\/datasheet\/pdf\/graphics\/H5GQ1H24AFR(Rev1.0).pdf"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Jablin, J.A., Jablin, T.B., Mutlu, O., Herlihy, M.: Warp-aware trace scheduling for GPUs. In: Proceedings of the 23rd International Conference on Parallel Architectures and Compilation, PACT 2014, pp. 163\u2013174. ACM, New York (2014)","DOI":"10.1145\/2628071.2628101"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Jeong, M.K., Erez, M., Sudanthi, C., Paver, N.: A QoS-aware memory controller for dynamically balancing GPU and CPU bandwidth use in an MPSoC. In: Proceedings of the 49th Annual Design Automation Conference, DAC 2012, pp. 850\u2013855. ACM, New York (2012)","DOI":"10.1145\/2228360.2228513"},{"issue":"4","key":"12_CR12","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1145\/2499368.2451158","volume":"48","author":"A Jog","year":"2013","unstructured":"Jog, A., Kayiran, O., Chidambaram Nachiappan, N., Mishra, A.K., Kandemir, M.T., Mutlu, O., Iyer, R., Das, C.R.: Owl: cooperative thread array aware scheduling techniques for improving GPGPU performance. SIGPLAN Not. 48(4), 395\u2013406 (2013)","journal-title":"SIGPLAN Not."},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Jog, A., Kayiran, O., Mishra, A.K., Kandemir, M.T., Mutlu, O., Iyer, R., Das, C.R.: Orchestrated scheduling and prefetching for GPGPUS. In: Proceedings of the 40th Annual International Symposium on Computer Architecture, ISCA 2013, pp. 332\u2013343. ACM, New York (2013)","DOI":"10.1145\/2485922.2485951"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Kayiran, O., Jog, A., Kandemir, M., Das, C.: Neither more nor less: optimizing thread-level parallelism for GPGPUS. In: 2013 22nd International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 157\u2013166, September 2013","DOI":"10.1109\/PACT.2013.6618806"},{"issue":"5","key":"12_CR15","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1109\/MM.2011.89","volume":"31","author":"S Keckler","year":"2011","unstructured":"Keckler, S., Dally, W., Khailany, B., Garland, M., Glasco, D.: Gpus and the future of parallel computing. IEEE Micro 31(5), 7\u201317 (2011)","journal-title":"IEEE Micro"},{"key":"12_CR16","unstructured":"Kuo, H.K., Yen, T.K., Lai, B.C., Jou, J.Y.: Cache capacity aware thread scheduling for irregular memory access on many-core GPGPUs. In: 2013 18th Asia and South Pacific Design Automation Conference (ASP-DAC), pp. 338\u2013343, January 2013"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Lee, M., Song, S., Moon, J., Kim, J., Seo, W., Cho, Y., Ryu, S.: Improving GPGPU resource utilization through alternative thread block scheduling. In: 2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA), pp. 260\u2013271, February 2014","DOI":"10.1109\/HPCA.2014.6835937"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Lee, S.Y., Wu, C.J.: Caws: criticality-aware warp scheduling for GPGPU workloads. In: Proceedings of the 23rd International Conference on Parallel Architectures and Compilation, PACT 2014, pp. 175\u2013186. ACM, New York (2014)","DOI":"10.1145\/2628071.2628107"},{"key":"12_CR19","unstructured":"Lakshminarayana, N.B., Kim, H.: Workshop on Language, Compiler, and Architecture Support for GPGPU (2010)"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Narasiman, V., Shebanow, M., Lee, C.J., Miftakhutdinov, R., Mutlu, O., Patt, Y.N.: Improving GPU performance via large warps and two-level warp scheduling. In: Proceedings of the 44th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO-44, pp. 308\u2013317. ACM, New York (2011)","DOI":"10.1145\/2155620.2155656"},{"key":"12_CR21","unstructured":"NVIDIA: \u201cnvidia cuda c programming guide v4.2\u201d (2012). docs.nvidia.com\/cuda\/"},{"issue":"2","key":"12_CR22","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1145\/342001.339668","volume":"28","author":"S Rixner","year":"2000","unstructured":"Rixner, S., Dally, W.J., Kapasi, U.J., Mattson, P., Owens, J.D.: Memory access scheduling. SIGARCH Comput. Archit. News 28(2), 128\u2013138 (2000)","journal-title":"SIGARCH Comput. Archit. News"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Rogers, T.G., O\u2019Connor, M., Aamodt, T.M.: Cache-conscious wavefront scheduling. In: Proceedings of the 2012 45th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO-45, pp. 72\u201383. IEEE Computer Society, Washington (2012)","DOI":"10.1109\/MICRO.2012.16"},{"issue":"3","key":"12_CR24","first-page":"66","volume":"12","author":"JE Stone","year":"2010","unstructured":"Stone, J.E., Gohara, D., Shi, G.: OpenCL: a parallel programming standard for heterogeneous computing systems. IEEE Des. Test 12(3), 66\u201373 (2010)","journal-title":"IEEE Des. Test"},{"key":"12_CR25","unstructured":"Robinson, T., Zuravleff, W.: Controller for a synchronous dram that maximizes throughput by allowing memory requests and commands to be issued out of order (1997). Google Patents"}],"container-title":["Lecture Notes in Computer Science","Cloud Computing and Big Data"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-28430-9_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T02:52:14Z","timestamp":1748746334000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-28430-9_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319284293","9783319284309"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-28430-9_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]}}}