{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T06:04:41Z","timestamp":1743055481119,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030953874"},{"type":"electronic","value":"9783030953881"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-95388-1_3","type":"book-chapter","created":{"date-parts":[[2022,2,22]],"date-time":"2022-02-22T08:20:55Z","timestamp":1645518055000},"page":"33-52","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-level PWB and\u00a0PWC for\u00a0Reducing TLB Miss Overheads on\u00a0GPUs"],"prefix":"10.1007","author":[{"given":"Yang","family":"Lin","sequence":"first","affiliation":[]},{"given":"Dunbo","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Chaoyang","family":"Jia","sequence":"additional","affiliation":[]},{"given":"Qiong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Li","family":"Shen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,2,23]]},"reference":[{"key":"3_CR1","doi-asserted-by":"crossref","unstructured":"Rogers, T.G., O\u2019Connor, M., Aamodt, T.M.: Cache-conscious wavefront scheduling. IEEE (2013)","DOI":"10.1109\/MICRO.2012.16"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Rossbach, C.J., Yu, Y., Currey, J., Martin, J.P., Fetterly, D.: Dandelion: a compiler and runtime for heterogeneous systems. In: Proceedings of the Twenty-Fourth ACM Symposium on Operating Systems Principles (2013)","DOI":"10.1145\/2517349.2522715"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Pichai, B., Hsu, L., Bhattacharjee, A.: Architectural support for address translation on gpus designing memory management units for CPU\/GPUS with unified address spaces. In: Proceedings of the 19th International Conference on Architectural Support for Programming Languages and Operating Systems (2014)","DOI":"10.1145\/2541940.2541942"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Wang, B., Yu, W., Sun, X.H., Wang, X.: Dacache: memory divergence-aware GPU cache management. ACM (2015)","DOI":"10.1145\/2751205.2751239"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Ausavarungnirun, R., et al.: Mosaic: a GPU memory manager with application-transparent support for multiple page sizes. In: the 50th Annual IEEE\/ACM International Symposium (2017)","DOI":"10.1145\/3123939.3123975"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Haria, S., Hill, M.D., Swift, M.M: Devirtualizing memory in heterogeneous systems. ACM SIGPLAN Notices (2018)","DOI":"10.1145\/3173162.3173194"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Shin, S., LeBeane, M., Solihin, Y., Basu, A.: Neighborhood-aware address translation for irregular GPU applications. In: 2018 51st Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO) (2018)","DOI":"10.1109\/MICRO.2018.00036"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Bakhoda, A., Yuan, G.L., Fung, W., Wong, H., Aamodt, T.M.: Analyzing cuda workloads using a detailed GPU simulator. In: 2009 IEEE International Symposium on Performance Analysis of Systems and Software (2009)","DOI":"10.1109\/ISPASS.2009.4919648"},{"issue":"3","key":"3_CR9","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1145\/1816038.1815970","volume":"38","author":"TW Barr","year":"2010","unstructured":"Barr, T.W., Cox, A.L., Rixner, S.: Translation caching: skip, don\u2019t walk (the page table). Comput. Arch. News 38(3), 48\u201359 (2010)","journal-title":"Comput. Arch. News"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Barr, T.W., Cox, A.L., Rixner, S.: Spectlb: a mechanism for speculative address translation. In: International Symposium on Computer Architecture (2011)","DOI":"10.1145\/2000064.2000101"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Burtscher, M., Nasre, R., Pingali, K.: A quantitative study of irregular programs on GPUs (2012)","DOI":"10.1109\/IISWC.2012.6402918"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Chatterjee, N., O\u2019Connor, M., Loh, G.H., Jayasena, N., Balasubramonian, R.: Managing dram latency divergence in irregular gpgpu applications. In: International Conference for High Performance Computing, Networking, Storage & Analysis (2014)","DOI":"10.1109\/SC.2014.16"},{"key":"3_CR13","doi-asserted-by":"publisher","unstructured":"Che, S., et al.: Rodinia: a benchmark suite for heterogeneous computing. In: 2009 IEEE International Symposium on Workload Characterization (IISWC), pp. 44\u201354 (2009). https:\/\/doi.org\/10.1109\/IISWC.2009.5306797","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"3_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1007\/978-3-642-54420-0_43","volume-title":"Euro-Par 2013: Parallel Processing Workshops","author":"A Esteve","year":"2014","unstructured":"Esteve, A., G\u00f3mez, M.E., Robles, A.: Exploiting parallelization on address translation: shared page walk cache. In: an Mey, D., et al. (eds.) Euro-Par 2013. LNCS, vol. 8374, pp. 433\u2013443. Springer, Heidelberg (2014). https:\/\/doi.org\/10.1007\/978-3-642-54420-0_43"},{"key":"3_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1007\/978-3-030-11404-6_3","volume-title":"Performance Evaluation and Benchmarking for the Era of Artificial Intelligence","author":"J Karimov","year":"2019","unstructured":"Karimov, J., Rabl, T., Markl, V.: PolyBench: the first benchmark for polystores. In: Nambiar, R., Poess, M. (eds.) TPCTC 2018. LNCS, vol. 11135, pp. 24\u201341. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-11404-6_3"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Li, S., Chen, K., Ahn, J.H., Brockman, J.B., Jouppi, N.P.: Cacti-p: architecture-level modeling for sram-based structures with advanced leakage reduction techniques. IEEE (2011)","DOI":"10.1109\/ICCAD.2011.6105405"},{"issue":"3","key":"3_CR17","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1145\/1816038.1815992","volume":"38","author":"J Meng","year":"2010","unstructured":"Meng, J., Tarjan, D., Skadron, K.: Dynamic warp subdivision for integrated branch and memory divergence tolerance. ACM Sigarch Comput. Arch. News 38(3), 235\u2013246 (2010)","journal-title":"ACM Sigarch Comput. Arch. News"},{"key":"3_CR18","unstructured":"Nvidia, C.: nvidia\u2019s next generation cuda compute architecture: Fermi (2009)"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Pham, B., Bhattacharjee, A., Eckert, Y., Loh, G.H.: Increasing TLB reach by exploiting clustering in page translations. In: IEEE International Symposium on High Performance Computer Architecture (2014)","DOI":"10.1109\/HPCA.2014.6835964"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Power, J., Hill, M., Wood, D.A.: Supporting x86\u201364 address translation for 100s of GPU lanes. In: IEEE International Symposium on High Performance Computer Architecture (2014)","DOI":"10.1109\/HPCA.2014.6835965"},{"key":"3_CR21","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1109\/TMM.2012.2232647","volume":"15","author":"J Sartori","year":"2013","unstructured":"Sartori, J., Kumar, R.: Branch and data herding: reducing control and memory divergence for error-tolerant GPU applications. IEEE Trans. Multimedia 15, 279\u2013290 (2013)","journal-title":"IEEE Trans. Multimedia"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Shin, S., et al.: Scheduling page table walks for irregular GPU applications. IEEE Computer Society (2018)","DOI":"10.1109\/ISCA.2018.00025"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Vavouliotis, G., et al.: Exploiting page table locality for agile TLB prefetching, pp. 85\u201398. IEEE (2021)","DOI":"10.1109\/ISCA52012.2021.00016"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Vesely, J., Basu, A., Oskin, M., Loh, G.H., Bhattacharjee, A.: Observations and opportunities in architecting shared virtual memory for heterogeneous systems. In: 2016 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS) (2016)","DOI":"10.1109\/ISPASS.2016.7482091"},{"key":"3_CR25","unstructured":"Wang, B.: Mitigating GPU memory divergence for data-intensive applications (2015)"},{"issue":"4","key":"3_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3046683","volume":"14","author":"S Zhang","year":"2017","unstructured":"Zhang, S., Qin, Z., Yang, Y., Shen, L., Wang, Z.: Improving the efficiency of GPGPU work-queue through data awareness. ACM Trans. Archit. Code Optim. 14(4), 1\u201322 (2017)","journal-title":"ACM Trans. Archit. Code Optim."},{"issue":"3","key":"3_CR27","first-page":"1","volume":"14","author":"S Zhang","year":"2020","unstructured":"Zhang, S., Qin, Z., Yang, Y., Shen, L., Wang, Z.: Transparent partial page migration between CPU and GPU. Front. Comput. Sci. 14(3), 1\u201313 (2020)","journal-title":"Front. Comput. Sci."}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-95388-1_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,22]],"date-time":"2022-02-22T08:28:15Z","timestamp":1645518495000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-95388-1_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030953874","9783030953881"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-95388-1_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"23 February 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 December 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/nsclab.org\/ica3pp2021\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"403","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"145","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.12","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.27","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}