{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T08:47:14Z","timestamp":1771922834404,"version":"3.50.1"},"reference-count":66,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T00:00:00Z","timestamp":1769817600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T00:00:00Z","timestamp":1769817600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,1,31]]},"DOI":"10.1109\/cgo68049.2026.11395210","type":"proceedings-article","created":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T20:46:32Z","timestamp":1771879592000},"page":"188-201","source":"Crossref","is-referenced-by-count":0,"title":["FRUGAL: Pushing GPU Applications beyond Memory Limits"],"prefix":"10.1109","author":[{"given":"Lingqi","family":"Zhang","sequence":"first","affiliation":[{"name":"RIKEN Center for Computational Science,Japan"}]},{"given":"Tengfei","family":"Wang","sequence":"additional","affiliation":[{"name":"Google Cloud Japan,Japan"}]},{"given":"Jiajun","family":"Huang","sequence":"additional","affiliation":[{"name":"University of South Florida,USA"}]},{"given":"Chen","family":"Zhuang","sequence":"additional","affiliation":[{"name":"RIKEN Center for Computational Science,Japan"}]},{"given":"Ivan R.","family":"Ivanov","sequence":"additional","affiliation":[{"name":"RIKEN Center for Computational Science,Japan"}]},{"given":"Peng","family":"Chen","sequence":"additional","affiliation":[{"name":"RIKEN Center for Computational Science,Japan"}]},{"given":"Toshio","family":"Endo","sequence":"additional","affiliation":[{"name":"Institute of Science Tokyo,Japan"}]},{"given":"Mohamed","family":"Wahib","sequence":"additional","affiliation":[{"name":"RIKEN Center for Computational Science,Japan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-019-47174-9"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783721"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00023"},{"key":"ref4","article-title":"He who can pay top dollar for hbm memory controls ai training","author":"Morgan","year":"2024"},{"key":"ref5","article-title":"Nvidia a100 enterprise pcie 40gb\/80gb \u2014 vipera - tomorrow\u2019s technology today"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2024.3431910"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3524059.3532394"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/sc41405.2020.00024"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2017.97"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3145617.3145619"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-023-05103-8"},{"key":"ref12","article-title":"Pears: A performance-aware static and dynamic framework for heterogeneous memory","author":"Patil","year":"2021"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387537"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807655"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441581"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460355"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3480855"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3650200.3656608"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00035"},{"key":"ref20","first-page":"71","article-title":"Optimizing file systems on heterogeneous memory by integrating DRAM cache with virtual memory management","volume-title":"22nd USENIX Conference on File and Storage Technologies (FAST 24)","author":"Liu"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3350755.3400233"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582063"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613167"},{"key":"ref24","first-page":"817","article-title":"FlexMem: Adaptive page profiling and migration for tiered memory","volume-title":"2024 USENIX Annual Technical Conference (USENIX ATC 24)","author":"Xu"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507735"},{"key":"ref26","first-page":"8343","article-title":"Nimble: Lightweight and parallel gpu task scheduling for deep learning","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Kwon","year":"2020"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3519563"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref29","first-page":"701","article-title":"Welder: Scheduling deep learning memory access via tile-graph","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Shi"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1038\/s41534-019-0239-7"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1103\/RevModPhys.86.153"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607085"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00078"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2019.02.007"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3469030"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3039728"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-58667-0_19"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2749246.2749255"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593705"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593716"},{"key":"ref41","first-page":"16","article-title":"Better performance at lower occupancy","volume-title":"Proceedings of the GPU technology conference, GTC","volume":"10","author":"Volkov"},{"key":"ref42","volume-title":"Understanding latency hiding on GPUs.","author":"Volkov","year":"2016"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/113446.113466"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/344588.344618"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/24039.24041"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2004.1281665"},{"key":"ref47","article-title":"CUDA C++ Programming Guidr","year":"2025"},{"key":"ref48","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Paszke","year":"2019"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref50","article-title":"A Guide to CUDA Graphs in GROMACS 2023","author":"Witsoe","year":"2025"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.20291"},{"key":"ref52","volume-title":"Nvidia Grace Hopper Superchip architecture white paper","year":"2024"},{"key":"ref53","article-title":"Gurobi Optimizer Reference Manual","year":"2024"},{"key":"ref54","article-title":"tiny-cuda-nn","author":"M\u00fcller","year":"2021"},{"key":"ref55","article-title":"An approximate fourier transform useful in quantum factoring","author":"Coppersmith","year":"2002"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651353"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575748"},{"key":"ref58","article-title":"Nvidia nvpl"},{"key":"ref59","volume-title":"Address Translation Services.","year":"2025"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577497"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00055"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1145\/3314221.3314650"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00019"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/3676641.3711999"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/3092255.3092273"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/3617232.3624856"}],"event":{"name":"2026 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)","location":"Sydney, Australia","start":{"date-parts":[[2026,1,31]]},"end":{"date-parts":[[2026,2,4]]}},"container-title":["2026 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11395173\/11394837\/11395210.pdf?arnumber=11395210","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T07:42:07Z","timestamp":1771918927000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11395210\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,31]]},"references-count":66,"URL":"https:\/\/doi.org\/10.1109\/cgo68049.2026.11395210","relation":{},"subject":[],"published":{"date-parts":[[2026,1,31]]}}}