{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T22:40:01Z","timestamp":1750545601906,"version":"3.41.0"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030794774"},{"type":"electronic","value":"9783030794781"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-79478-1_13","type":"book-chapter","created":{"date-parts":[[2021,6,22]],"date-time":"2021-06-22T16:04:37Z","timestamp":1624377877000},"page":"147-158","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Dynamic GMMU Bypass for Address Translation in Multi-GPU Systems"],"prefix":"10.1007","author":[{"given":"Jinhui","family":"Wei","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianzhuang","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chen","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunping","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,6,23]]},"reference":[{"issue":"2","key":"13_CR1","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1145\/3140659.3080231","volume":"45","author":"A Arunkumar","year":"2017","unstructured":"Arunkumar, A., et al.: MCM-GPU: Multi-chip-module GPUs for continued performance scalability. ACM SIGARCH Comput. Archit. News 45(2), 320\u2013332 (2017)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"key":"13_CR2","doi-asserted-by":"crossref","unstructured":"Ausavarungnirun, R., et al.: Mosaic: a GPU memory manager with application-transparent support for multiple page sizes. In: Proceedings of the 50th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 136\u2013150 (2017)","DOI":"10.1145\/3123939.3123975"},{"issue":"2","key":"13_CR3","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1145\/3296957.3173169","volume":"53","author":"R Ausavarungnirun","year":"2018","unstructured":"Ausavarungnirun, R., et al.: MASK: redesigning the GPU memory hierarchy to support multi-application concurrency. ACM SIGPLAN Not. 53(2), 503\u2013518 (2018)","journal-title":"ACM SIGPLAN Not."},{"key":"13_CR4","doi-asserted-by":"publisher","unstructured":"Baruah, T., et al.: Griffin: hardware-software support for efficient page migration in multi-GPU systems. In: 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 596\u2013609, February 2020. https:\/\/doi.org\/10.1109\/HPCA47549.2020.00055","DOI":"10.1109\/HPCA47549.2020.00055"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Ganguly, D., Zhang, Z., Yang, J., Melhem, R.: Interplay between hardware prefetcher and page eviction policy in CPU-GPU unified virtual memory. In: Proceedings of the 46th International Symposium on Computer Architecture, pp. 224\u2013235 (2019)","DOI":"10.1145\/3307650.3322224"},{"key":"13_CR6","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1016\/j.jmmm.2015.10.054","volume":"401","author":"C Jermain","year":"2016","unstructured":"Jermain, C., Rowlands, G., Buhrman, R., Ralph, D.: GPU-accelerated micromagnetic simulations using cloud computing. J. Magn. Magn. Mater. 401, 320\u2013322 (2016)","journal-title":"J. Magn. Magn. Mater."},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"Kim, G., Lee, M., Jeong, J., Kim, J.: Multi-GPU system design with memory networks. In: 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 484\u2013495. IEEE (2014)","DOI":"10.1109\/MICRO.2014.55"},{"key":"13_CR8","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, pp. 1097\u20131105 (2012)"},{"key":"13_CR9","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/LCA.2019.2955119","volume":"18","author":"C Li","year":"2019","unstructured":"Li, C., et al.: Priority-based PCIe scheduling for multi-tenant multi-GPU system. IEEE Comput. Archit. Lett. 18, 157\u2013160 (2019)","journal-title":"IEEE Comput. Archit. Lett."},{"key":"13_CR10","unstructured":"NVIDIA, T.: V100 GPU architecture. Whitepaper (2017). nvidia.com. Accessed September 2019"},{"issue":"1","key":"13_CR11","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1145\/2654822.2541942","volume":"42","author":"B Pichai","year":"2014","unstructured":"Pichai, B., Hsu, L., Bhattacharjee, A.: Architectural support for address translation on GPUs: designing memory management units for CPU\/GPUs with unified address spaces. ACM SIGARCH Comput. Archit. News 42(1), 743\u2013758 (2014)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Power, J., Hill, M.D., Wood, D.A.: Supporting x86\u201364 address translation for 100s of GPU lanes. In: 2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA), pp. 568\u2013578. IEEE (2014)","DOI":"10.1109\/HPCA.2014.6835965"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Raina, R., Madhavan, A., Ng, A.Y.: Large-scale deep unsupervised learning using graphics processors. In: Proceedings of the 26th Annual International Conference on Machine Learning, pp. 873\u2013880 (2009)","DOI":"10.1145\/1553374.1553486"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Sanaullah, A., Mojumder, S.A., Lewis, K.M., Herbordt, M.C.: GPU-accelerated charge mapping. In: 2016 IEEE High Performance Extreme Computing Conference (HPEC), pp. 1\u20137. IEEE (2016)","DOI":"10.1109\/HPEC.2016.7761599"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Sun, Y., et al.: MGPUSim: enabling multi-GPU performance modeling and optimization. In: Proceedings of the 46th International Symposium on Computer Architecture, pp. 197\u2013209 (2019)","DOI":"10.1145\/3307650.3322230"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Wang, Y., Davidson, A., Pan, Y., Wu, Y., Riffel, A., Owens, J.D.: Gunrock: a high-performance graph processing library on the GPU. In: Proceedings of the 21st ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 1\u201312 (2016)","DOI":"10.1145\/2851141.2851145"},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Wu, Y., Wang, Y., Pan, Y., Yang, C., Owens, J.D.: Performance characterization of high-level programming models for GPU graph analytics. In: 2015 IEEE International Symposium on Workload Characterization, pp. 66\u201375. IEEE (2015)","DOI":"10.1109\/IISWC.2015.13"},{"key":"13_CR18","doi-asserted-by":"crossref","unstructured":"Young, V., Jaleel, A., Bolotin, E., Ebrahimi, E., Nellans, D., Villa, O.: Combining HW\/SW mechanisms to improve NUMA performance of multi-GPU systems. In: 2018 51st Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 339\u2013351. IEEE (2018)","DOI":"10.1109\/MICRO.2018.00035"},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"Zheng, T., Nellans, D., Zulfiqar, A., Stephenson, M., Keckler, S.W.: Towards high performance paged memory for GPUs. In: 2016 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 345\u2013357. IEEE (2016)","DOI":"10.1109\/HPCA.2016.7446077"},{"issue":"4","key":"13_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2996190","volume":"13","author":"AK Ziabari","year":"2016","unstructured":"Ziabari, A.K., et al.: UMH: a hardware-based unified memory hierarchy for systems with multiple discrete GPUs. ACM Trans. Archit. Code Optim. (TACO) 13(4), 1\u201325 (2016)","journal-title":"ACM Trans. Archit. Code Optim. (TACO)"}],"container-title":["Lecture Notes in Computer Science","Network and Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-79478-1_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T22:02:52Z","timestamp":1750543372000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-79478-1_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030794774","9783030794781"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-79478-1_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"23 June 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NPC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"IFIP International Conference on Network and Parallel Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zhengzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 September 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 September 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"npc2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ncic.ac.cn\/npc2020\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}