{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T21:15:38Z","timestamp":1773436538484,"version":"3.50.1"},"reference-count":34,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Science and Technology Council","award":["111-2221-E-A49-198-MY3"],"award-info":[{"award-number":["111-2221-E-A49-198-MY3"]}]},{"name":"National Science and Technology Council","award":["113-2221-E-A49-189-MY3"],"award-info":[{"award-number":["113-2221-E-A49-189-MY3"]}]},{"name":"National Science and Technology Council","award":["114-2221-E-A49-099-MY3"],"award-info":[{"award-number":["114-2221-E-A49-099-MY3"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Emerg. Topics Comput."],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1109\/tetc.2026.3661468","type":"journal-article","created":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T21:05:49Z","timestamp":1770757549000},"page":"286-302","source":"Crossref","is-referenced-by-count":0,"title":["MemAscend: System Memory Optimization for SSD-Offloaded LLM Fine-Tuning"],"prefix":"10.1109","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9783-4640","authenticated-orcid":false,"given":"Yong-Cheng","family":"Liaw","sequence":"first","affiliation":[{"name":"Institute of Computer Science and Engineering, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1619-4335","authenticated-orcid":false,"given":"Shuo-Han","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science and the Institute of Artificial Intelligence Innovation, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Large language models: A survey","author":"Minaee","year":"2025"},{"key":"ref2","article-title":"OpenAI GPT-4.5 system card","year":"2025"},{"key":"ref3","article-title":"The Llama 3 herd of models","author":"Grattafiori","year":"2024"},{"key":"ref4","first-page":"607","article-title":"Automatic summarization of long documents","volume-title":"Proc. 21st Int. Conf. Natural Lang. Process.","author":"Chhibbar"},{"key":"ref5","article-title":"Scaling up summarization: Leveraging large language models for long text extractive summarization","author":"Hemamou","year":"2024"},{"key":"ref6","first-page":"5420","article-title":"Multilingual machine translation with open large language models at practical scale: An empirical study","volume-title":"Proc. Conf. North Amer. Chapter Assoc. Comput. Linguistics: Hum. Lang. Technol.","author":"Cui"},{"key":"ref7","article-title":"Instruction tuning for large language models: A survey","author":"Zhang","year":"2024"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00024"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.14778\/3551793.3551828"},{"key":"ref10","article-title":"Training deep nets with sublinear memory cost","author":"Chen","year":"2016"},{"key":"ref11","article-title":"Cuda toolkit documentation","year":"2007"},{"key":"ref12","article-title":"Liger kernel: Efficient triton kernels for LLM training","author":"Hsu","year":"2025"},{"key":"ref13","first-page":"12360","article-title":"Root mean square layer normalization","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref14","article-title":"GLU variants improve transformer","author":"Shazeer","year":"2020"},{"key":"ref15","first-page":"8792","article-title":"Generalized cross entropy loss for training deep neural networks with noisy labels","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1189"},{"key":"ref18","article-title":"Mixed precision training","author":"Micikevicius","year":"2018"},{"key":"ref19","first-page":"1","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma"},{"key":"ref20","article-title":"8-bit optimizers via block-wise quantization","author":"Dettmers","year":"2022"},{"key":"ref21","article-title":"Memory efficient optimizers with 4-bit states","volume-title":"Proc. 37th Conf. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref22","article-title":"Parameter-efficient fine-tuning for large models: A comprehensive survey","volume-title":"Trans. Mach. Learn. Res.","author":"Han","year":"2024"},{"key":"ref23","first-page":"17783","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hu"},{"key":"ref24","article-title":"LoRA learns less and forgets less","volume-title":"Trans. Mach. Learn. Res.","author":"Biderman","year":"2024"},{"key":"ref25","article-title":"LoRA vs full fine-tuning: An illusion of equivalence","author":"Shuttleworth","year":"2024"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476205"},{"key":"ref27","article-title":"How to optimize data transfers in CUDA C\/C++","author":"Harris","year":"2012"},{"key":"ref28","article-title":"LoHan: Low-cost high-performance framework to fine-tune 100B model on a consumer GPU","author":"Liao","year":"2024"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00034"},{"key":"ref30","first-page":"1","article-title":"SSDTrain: Faster large language model training using SSD-based activation offloading","volume-title":"Proc. Des. Automat. Conf.","author":"Wu"},{"key":"ref31","article-title":"DeepSpeed","year":"2020"},{"key":"ref32","first-page":"551","article-title":"ZeRO-Offload: Democratizing billion-scale model training","volume-title":"Proc. 2021 USENIX Annu. Tech. Conf.","author":"Ren"},{"key":"ref33","article-title":"Unsloth gradient checkpointing - 4x longer context windows","year":"2025"},{"key":"ref34","article-title":"Qwen2.5 technical report","year":"2025"},{"key":"ref35","article-title":"OpenWebText corpus","author":"Gokaslan","year":"2019"}],"container-title":["IEEE Transactions on Emerging Topics in Computing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6245516\/11433431\/11391518.pdf?arnumber=11391518","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T19:54:57Z","timestamp":1773431697000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11391518\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":34,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tetc.2026.3661468","relation":{},"ISSN":["2168-6750","2376-4562"],"issn-type":[{"value":"2168-6750","type":"electronic"},{"value":"2376-4562","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]}}}