{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T18:03:03Z","timestamp":1777485783677,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":83,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,4,4]],"date-time":"2019-04-04T00:00:00Z","timestamp":1554336000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1253700, 1337147, 1319755"],"award-info":[{"award-number":["1253700, 1337147, 1319755"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,4,4]]},"DOI":"10.1145\/3297858.3304024","type":"proceedings-article","created":{"date-parts":[[2019,4,4]],"date-time":"2019-04-04T18:38:43Z","timestamp":1554403123000},"page":"331-345","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":127,"title":["Nimble Page Management for Tiered Memory Systems"],"prefix":"10.1145","author":[{"given":"Zi","family":"Yan","sequence":"first","affiliation":[{"name":"Rutgers University &amp; NVIDIA, Santa Clara, CA, USA"}]},{"given":"Daniel","family":"Lustig","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, CA, USA"}]},{"given":"David","family":"Nellans","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, CA, USA"}]},{"given":"Abhishek","family":"Bhattacharjee","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, CT, USA"}]}],"member":"320","published-online":{"date-parts":[[2019,4,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694381"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037706"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/3154690.3154694"},{"key":"e_1_3_2_1_4_1","volume-title":"d.}. RFC: Transparent Hugepage support. https: \/\/lwn.net\/Articles\/358904\/. {Online","author":"Arcangeli Andrea","year":"2018","unstructured":"Andrea Arcangeli. {n. d.}. RFC: Transparent Hugepage support. https: \/\/lwn.net\/Articles\/358904\/. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_5_1","volume-title":"Avoiding TLB Shootdowns Through Self- Invalidating TLB Entries. In 2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT). 273--287","author":"Awad Amro","unstructured":"Amro Awad, Arkaprava Basu, Sergey Blagodurov, Yan Solihin, and Gabriel H. Loh. 2017. Avoiding TLB Shootdowns Through Self- Invalidating TLB Entries. In 2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT). 273--287."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485943"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2597917.2597924"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/195473.195485"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132402.3132404"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.63"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/782814.782833"},{"key":"e_1_3_2_1_12_1","unstructured":"Jonathan Corbet. 2012. AutoNUMA: the other approach to NUMA scheduling. http:\/\/lwn.net\/Articles\/488709\/. {Online; accessed 31-Jul- 2018}."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037704"},{"key":"e_1_3_2_1_14_1","volume-title":"What's new in Hyper-V on Windows Server 2016 Technical Preview. https:\/\/technet. microsoft.com\/en-us\/windows-server-docs\/compute\/hyper-v\/ what-s-new-in-hyper-v-on-windows. {Online","author":"Davies Kathy","year":"2018","unstructured":"Kathy Davies. 2016. What's new in Hyper-V on Windows Server 2016 Technical Preview. https:\/\/technet. microsoft.com\/en-us\/windows-server-docs\/compute\/hyper-v\/ what-s-new-in-hyper-v-on-windows. {Online; accessed: 31-Jul-2018}."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/800001.811670"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.50"},{"key":"e_1_3_2_1_17_1","volume-title":"2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA). 223--234","author":"Du Y.","unstructured":"Y. Du, M. Zhou, B. R. Childers, D. Moss\u00e9, and R. Melhem. 2015. Supporting superpages in non-contiguous physical memory. In 2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA). 223--234."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2669594.2669599"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2016.10"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/2643634.2643659"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/983550"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/3154630.3154683"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.36"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2731186.2731191"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173194"},{"key":"e_1_3_2_1_26_1","volume-title":"d.}. Intel Memory Latency Checker. https:\/\/software.intel. com\/en-us\/articles\/intelr-memory-latency-checker. {Online","year":"2018","unstructured":"Intel. {n. d.}. Intel Memory Latency Checker. https:\/\/software.intel. com\/en-us\/articles\/intelr-memory-latency-checker. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_27_1","volume-title":"2nd Generation Intel Xeon Phi Processor","author":"Knights Landing","unstructured":"Intel. 2016. Knights Landing (KNL): 2nd Generation Intel Xeon Phi Processor. http:\/\/www.hotchips.org\/wp-content\/uploads\/hc_archives\/ hc27\/HC27.25-Tuesday-Epub\/HC27.25.70-Processors-Epub\/HC27. 25.710-Knights-Landing-Sodani-Intel.pdf. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_28_1","volume-title":"JESD79--4A: DDR4 SDRAM Standard. https:\/\/www. jedec.org\/sites\/default\/files\/docs\/JESD79--4A.pdf. {Online","author":"JEDEC.","year":"2018","unstructured":"JEDEC. 2014. JESD79--4A: DDR4 SDRAM Standard. https:\/\/www. jedec.org\/sites\/default\/files\/docs\/JESD79--4A.pdf. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_29_1","unstructured":"JEDEC. 2015. High Bandwidth Memory(HBM) DRAM - JESD235A. http:\/\/www.jedec.org\/standards-documents\/docs\/jesd235a. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.51"},{"key":"e_1_3_2_1_31_1","volume-title":"SPEC ACCEL: A Standard Application Suite for Measuring Hardware Accelerator Performance","author":"Juckeland Guido","year":"2015","unstructured":"Guido Juckeland, William Brantley, Sunita Chandrasekaran, Barbara Chapman, Shuai Che, Mathew Colgrove, Huiyu Feng, Alexander Grund, Robert Henschel, Wen-Mei W. Hwu, Huian Li, Matthias S. M\u00fcller, Wolfgang E. Nagel, Maxim Perminov, Pavel Shelepugin, Kevin Skadron, John Stratton, Alexey Titov, KeWang, Matthijs vanWaveren, Brian Whitney, Sandra Wienke, Rengan Xu, and Kalyan Kumaran. 2015. SPEC ACCEL: A Standard Application Suite for Measuring Hardware Accelerator Performance. Springer International Publishing, Cham, 46--67."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080245"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2749471"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173198"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/3026877.3026931"},{"key":"e_1_3_2_1_36_1","volume-title":"d.}. Swap migration V3: Overview. https: \/\/lwn.net\/Articles\/156603\/. {Online","author":"Lameter Christoph","year":"2018","unstructured":"Christoph Lameter. {n. d.}. Swap migration V3: Overview. https: \/\/lwn.net\/Articles\/156603\/. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2508834.2513149"},{"key":"e_1_3_2_1_38_1","volume-title":"https: \/\/asc.llnl.gov\/coral-info. {Online","author":"Lawerence Livermore National Laboratory. 2016. CORAL\/Sierra.","year":"2018","unstructured":"Lawerence Livermore National Laboratory. 2016. CORAL\/Sierra. https: \/\/asc.llnl.gov\/coral-info. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555758"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.5555\/2813767.2813788"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555789"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2012.6168955"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872401"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155673"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2819001.2819005"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/977091.977115"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.5555\/1014910"},{"key":"e_1_3_2_1_48_1","volume-title":"Mike Ignatowski, and Gabriel H. Loh.","author":"Meswani Mitesh R.","year":"2015","unstructured":"Mitesh R. Meswani, Sergey Blagodurov, David Roberts, J ohn Slice, Mike Ignatowski, and Gabriel H. Loh. 2015. Heterogeneous Memory Architectures: A HW\/SW Approach For Mixing Die-stacked And Offpackage Memories. In 2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA). 126--136."},{"key":"e_1_3_2_1_49_1","volume-title":"Hybrid Memory Cube Specification 2.1. https:\/\/www. nuvation.com\/sites\/default\/files\/Nuvation-Engineering-Images\/ Articles\/FPGAs-and-HMC\/HMC-30G-VSR_HMCC_Specification. pdf. {Online","author":"Micron","year":"2018","unstructured":"Micron 2015. Hybrid Memory Cube Specification 2.1. https:\/\/www. nuvation.com\/sites\/default\/files\/Nuvation-Engineering-Images\/ Articles\/FPGAs-and-HMC\/HMC-30G-VSR_HMCC_Specification. pdf. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_50_1","volume-title":"3D XPoint Technology. https:\/\/www.micron.com\/ products\/advanced-solutions\/3d-xpoint-technology. {Online","year":"2018","unstructured":"Micron. 2016. 3D XPoint Technology. https:\/\/www.micron.com\/ products\/advanced-solutions\/3d-xpoint-technology. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.5555\/1855568.1855582"},{"key":"e_1_3_2_1_52_1","volume-title":"Ang","author":"Murphy Richard C.","year":"2010","unstructured":"Richard C. Murphy, Kyle B. Wheeler, Brian W. Barrett, and James A. Ang. 2010. Introducing the Graph 500. In Cray User's Group."},{"key":"e_1_3_2_1_53_1","unstructured":"Linux Newbies. 2017. Linux 4.14 Release Note. https:\/\/kernelnewbies. org\/Linux_4.14#Memory_management"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/335231.335243"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.5555\/850941.852887"},{"key":"e_1_3_2_1_56_1","volume-title":"Unified Memory in CUDA 6. http: \/\/devblogs.nvidia.com\/parallelforall\/unified-memory-in-cuda-6\/. {Online","author":"NVIDIA Corporation","year":"2018","unstructured":"NVIDIA Corporation. 2013. Unified Memory in CUDA 6. http: \/\/devblogs.nvidia.com\/parallelforall\/unified-memory-in-cuda-6\/. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_57_1","unstructured":"NVIDIA Corporation. 2014. NVLink Pascal and Stacked Memory: Feeding the Appetite for Big Data. http:\/\/devblogs.nvidia.com\/parallelforall\/ nvlink-pascal-stacked-memory-feeding-appetite-big-data\/. {Online; accessed 14-Aug-2016}."},{"key":"e_1_3_2_1_58_1","volume-title":"https:\/\/www.olcf.ornl. gov\/summit\/. {Online","author":"Oak Ridge National Laboratory. 2018. Summit.","year":"2018","unstructured":"Oak Ridge National Laboratory. 2018. Summit. https:\/\/www.olcf.ornl. gov\/summit\/. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.30"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173203"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056034"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00026"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2011.7477494"},{"key":"e_1_3_2_1_64_1","volume-title":"2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA). 558--567","author":"Pham Binh","unstructured":"Binh Pham, Abhishek Bhattacharjee, Yasuko Eckert, and Gabriel H. Loh. 2014. Increasing TLB reach by exploiting clustering in page translations. In 2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA). 558--567."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830773"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541942"},{"key":"e_1_3_2_1_67_1","volume-title":"2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA). 568--578","author":"Power Jason","unstructured":"Jason Power, Mark D. Hill, and David A. Wood. 2014. Supporting x86--64 address translation for 100s of GPU lanes. In 2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA). 568--578."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.30"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555760"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/1995896.1995911"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"crossref","unstructured":"Bogdan F. Romanescu Alvin R. Lebeck Daniel J. Sorin and Anne Bracy. 2010. UNified Instruction\/Translation\/Data (UNITD) coherence: One protocol to rule them all. In HPCA - 16 2010 The Sixteenth International Symposium on High-Performance Computer Architecture. 1--12.","DOI":"10.1109\/HPCA.2010.5416643"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3205289.3208064"},{"key":"e_1_3_2_1_73_1","volume-title":"2016 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 481--493","author":"Seshadri Vivek","year":"2016","unstructured":"Vivek Seshadri, Yoongu Kim, Chris Fallin, Donghyuk Lee, , Rachata Ausavarungnirun, Gennady Pekhimenko, Yixin Luo, Onur Mutlu, Phillip B Gibbons, and Michael A Kozuch. 2016. RowClone: fast and energy-efficient in-DRAM bulk data copy and initialization. In 2016 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 481--493."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2004.21"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.56"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.31"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2008.05.006"},{"key":"e_1_3_2_1_78_1","volume-title":"Performance profiling on core kernel code. https: \/\/plus.google.com\/+LinusTorvalds\/posts\/YDKRFDwHwr6. {Online","author":"Torvalds Linus","year":"2018","unstructured":"Linus Torvalds. 2014. Performance profiling on core kernel code. https: \/\/plus.google.com\/+LinusTorvalds\/posts\/YDKRFDwHwr6. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_79_1","unstructured":"UEFI.org. 2017. Advanced Configuration and Power Interface Specification Version 6.2. http:\/\/www.uefi.org\/sites\/default\/files\/resources\/ ACPI_6_2.pdf. {Online; accessed 31-Jul-2018}."},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.65"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446088"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080211"},{"key":"e_1_3_2_1_83_1","volume-title":"Surface Heterogeneous Memory Performance Information. https:\/\/lwn.net\/Articles\/727348\/. {Online","author":"Zwisler Ross","year":"2018","unstructured":"Ross Zwisler. 2017. Surface Heterogeneous Memory Performance Information. https:\/\/lwn.net\/Articles\/727348\/. {Online; accessed 31- Jul-2018}."}],"event":{"name":"ASPLOS '19: Architectural Support for Programming Languages and Operating Systems","location":"Providence RI USA","acronym":"ASPLOS '19","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3297858.3304024","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3297858.3304024","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3297858.3304024","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:53:14Z","timestamp":1750204394000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3297858.3304024"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4,4]]},"references-count":83,"alternative-id":["10.1145\/3297858.3304024","10.1145\/3297858"],"URL":"https:\/\/doi.org\/10.1145\/3297858.3304024","relation":{},"subject":[],"published":{"date-parts":[[2019,4,4]]},"assertion":[{"value":"2019-04-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}