{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:44:30Z","timestamp":1740123870164,"version":"3.37.3"},"reference-count":20,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2017,10,9]],"date-time":"2017-10-09T00:00:00Z","timestamp":1507507200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61572508","61672526"],"award-info":[{"award-number":["61572508","61672526"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61472435"],"award-info":[{"award-number":["61472435"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2018,8]]},"DOI":"10.1007\/s10766-017-0521-2","type":"journal-article","created":{"date-parts":[[2017,10,8]],"date-time":"2017-10-08T22:39:59Z","timestamp":1507502399000},"page":"722-735","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["The Design of NoC-Side Memory Access Scheduling for Energy-Efficient GPGPUs"],"prefix":"10.1007","volume":"46","author":[{"given":"Wenjie","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sheng","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Libo","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiying","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,10,9]]},"reference":[{"key":"521_CR1","doi-asserted-by":"crossref","unstructured":"Bakhoda, A., Kim, J., Aamodt, T.M.: Throughput-effective on-chip networks for manycore accelerators. In: Proceedings of the 2010 43rd Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 421\u2013432. IEEE Computer Society (2010)","DOI":"10.1109\/MICRO.2010.50"},{"key":"521_CR2","doi-asserted-by":"crossref","unstructured":"Bakhoda, A., Yuan, G.L., Fung, W.W.L., Wong, H., Aamodt, T.M.: Analyzing CUDA workloads using a detailed GPU simulator. In: 2009 IEEE International Symposium on Performance Analysis of Systems and Software, pp. 163\u2013174. IEEE (2009)","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"521_CR3","doi-asserted-by":"crossref","unstructured":"Bourduas, S., Zilic, Z.: A hybrid ring\/mesh interconnect for network-on-chip using hierarchical rings for global routing. In: Proceedings of the First International Symposium on Networks-on-Chip, pp. 195\u2013204. IEEE Computer Society (2007)","DOI":"10.1109\/NOCS.2007.3"},{"key":"521_CR4","first-page":"169","volume-title":"Designing Coalescing Network-on-Chip for Efficient Memory Accesses of GPGPUs","author":"CT Chen","year":"2014","unstructured":"Chen, C.T., Huang, Y.S.C., Chang, Y.Y., Tu, C.Y., King, C.T., Wang, T.Y., Sang, J., Li, M.H.: Designing Coalescing Network-on-Chip for Efficient Memory Accesses of GPGPUs, pp. 169\u2013180. Springer, Berlin (2014)"},{"key":"521_CR5","volume-title":"Principles and Practices of Interconnection Networks","author":"W Dally","year":"2003","unstructured":"Dally, W., Towles, B.: Principles and Practices of Interconnection Networks. Morgan Kaufmann Publishers Inc., Burlington (2003)"},{"key":"521_CR6","doi-asserted-by":"crossref","unstructured":"Dally, W.J., Towles, B.: Route packets, not wires: on-chip interconnection networks. In: Proceedings of the 38th Design Automation Conference, pp. 684\u2013689. ACM (2001)","DOI":"10.1109\/DAC.2001.935594"},{"key":"521_CR7","doi-asserted-by":"crossref","unstructured":"Jang, H., Kim, J., Gratz, P., Yum, K.H., Kim, E.J.: Bandwidth-efficient on-chip interconnect designs for GPGPUs. In: Proceedings of the 52nd Annual Design Automation Conference, pp. 9:1\u20139:6. ACM (2015)","DOI":"10.1145\/2744769.2744803"},{"key":"521_CR8","doi-asserted-by":"publisher","unstructured":"Jerger, N.E., Peh, L.S.: On-chip networks. Synthesis Lectures on Computer Architecture, p. 141. Morgan & Claypool Publishers (2009). doi: 10.2200\/S00209ED1V01Y200907CAC008 .","DOI":"10.2200\/S00209ED1V01Y200907CAC008"},{"key":"521_CR9","doi-asserted-by":"crossref","unstructured":"Kim, H., Kim, J., Seo, W., Cho, Y., Ryu, S.: Providing cost-effective on-chip network bandwidth in GPGPUs. In: 2012 IEEE 30th International Conference on Computer Design (ICCD), pp. 407\u2013412. IEEE Computer Society (2012)","DOI":"10.1109\/ICCD.2012.6378671"},{"key":"521_CR10","doi-asserted-by":"crossref","unstructured":"Kim, Y., Lee, H., Kim, J.: An alternative memory access scheduling in manycore accelerators. In: 2011 International Conference on Parallel Architectures and Compilation Techniques, pp. 195\u2013196. IEEE Computer Society (2011)","DOI":"10.1109\/PACT.2011.37"},{"issue":"4","key":"521_CR11","first-page":"48:1","volume":"18","author":"J Lee","year":"2013","unstructured":"Lee, J., Li, S., Kim, H., Yalamanchili, S.: Adaptive virtual channel partitioning for network-on-chip in heterogeneous architectures. ACM Trans. Des. Autom. Electron. Syst. 18(4), 48:1\u201348:28 (2013)","journal-title":"ACM Trans. Des. Autom. Electron. Syst."},{"key":"521_CR12","doi-asserted-by":"crossref","unstructured":"Leng, J., Hetherington, T., ElTantawy, A., Gilani, S., Kim, N.S., Aamodt, T.M., Reddi, V.J.: GPUwattch: enabling energy optimizations in GPGPUs. In: Proceedings of the 40th Annual International Symposium on Computer Architecture, ISCA \u201913, pp. 487\u2013498. ACM (2013)","DOI":"10.1145\/2485922.2485964"},{"key":"521_CR13","doi-asserted-by":"crossref","unstructured":"Ma, S., Enright\u00a0Jerger, N., Wang, Z.: DBAR: an efficient routing algorithm to support multiple concurrent applications in networks-on-chip. In: Proceedings of the 38th Annual International Symposium on Computer Architecture, pp. 413\u2013424. ACM (2011)","DOI":"10.1145\/2000064.2000113"},{"key":"521_CR14","doi-asserted-by":"crossref","unstructured":"Mutlu, O., Moscibroda, T.: Stall-time fair memory access scheduling for chip multiprocessors. In: Proceedings of the 40th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 146\u2013160. IEEE Computer Society (2007)","DOI":"10.1109\/MICRO.2007.21"},{"key":"521_CR15","doi-asserted-by":"crossref","unstructured":"Mutlu, O., Moscibroda, T.: Parallelism-aware batch scheduling: enhancing both performance and fairness of shared dram systems. In: Proceedings of the 35th Annual International Symposium on Computer Architecture, pp. 63\u201374. IEEE Computer Society (2008)","DOI":"10.1145\/1394608.1382128"},{"key":"521_CR16","doi-asserted-by":"crossref","unstructured":"Nesbit, K.J., Aggarwal, N., Laudon, J., Smith, J.E.: Fair queuing memory systems. In: Proceedings of the 39th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 208\u2013222. IEEE Computer Society (2006)","DOI":"10.1109\/MICRO.2006.24"},{"key":"521_CR17","doi-asserted-by":"crossref","unstructured":"Rafique, N., Lim, W.T., Thottethodi, M.: Effective management of dram bandwidth in multicore processors. In: 16th International Conference on Parallel Architecture and Compilation Techniques (PACT 2007), pp. 245\u2013258. IEEE Computer Society (2007)","DOI":"10.1109\/PACT.2007.4336216"},{"key":"521_CR18","doi-asserted-by":"crossref","unstructured":"Rixner, S., Dally, W.J., Kapasi, U.J., Mattson, P., Owens, J.D.: Memory access scheduling. In: Proceedings of the 27th Annual International Symposium on Computer Architecture, pp. 128\u2013138. ACM (2000)","DOI":"10.1145\/339647.339668"},{"key":"521_CR19","unstructured":"Stratton, J.A., Rodrigues, C., Sung, I.J., Obeid, N., Chang, L.W., Anssari, N., Geng, D., Liu, W.M., Hwu, W.: Parboil: a revised benchmark suite for scientific and commercial throughput computing. IMPACT Technical Report (2012)"},{"key":"521_CR20","doi-asserted-by":"crossref","unstructured":"Yuan, G.L., Bakhoda, A., Aamodt, T.M.: Complexity effective memory access scheduling for many-core accelerator architectures. In: 2009 42nd Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 34\u201344. ACM (2009)","DOI":"10.1145\/1669112.1669119"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-017-0521-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-017-0521-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-017-0521-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,4]],"date-time":"2019-10-04T06:39:50Z","timestamp":1570171190000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-017-0521-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,10,9]]},"references-count":20,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2018,8]]}},"alternative-id":["521"],"URL":"https:\/\/doi.org\/10.1007\/s10766-017-0521-2","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"type":"print","value":"0885-7458"},{"type":"electronic","value":"1573-7640"}],"subject":[],"published":{"date-parts":[[2017,10,9]]}}}