{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T05:55:30Z","timestamp":1763704530049,"version":"3.45.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T00:00:00Z","timestamp":1761436800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T00:00:00Z","timestamp":1761436800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,26]]},"DOI":"10.1109\/iccad66269.2025.11240826","type":"proceedings-article","created":{"date-parts":[[2025,11,20]],"date-time":"2025-11-20T18:39:34Z","timestamp":1763663974000},"page":"1-9","source":"Crossref","is-referenced-by-count":0,"title":["LLM-on-the-Palm: Mobile LLM Inference with PIM-Enhanced NAND Flash Memory"],"prefix":"10.1109","author":[{"given":"Hyunjin","family":"Kim","sequence":"first","affiliation":[{"name":"Seoul National University,Seoul,Republic of Korea"}]},{"given":"Sanghyeok","family":"Han","sequence":"additional","affiliation":[{"name":"Seoul National University,Seoul,Republic of Korea"}]},{"given":"Jae-Joon","family":"Kim","sequence":"additional","affiliation":[{"name":"Seoul National University,Seoul,Republic of Korea"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3712001"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2025.3527641"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.678"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.jml.2019.104047"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071024"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00108"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00056"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2019.000-5"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC49657.2024.10454343"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310322"},{"article-title":"Characterising bias in compressed models","year":"2020","author":"Hooker","key":"ref11"},{"key":"ref12","first-page":"62414","article-title":"Pruning vs quantization: Which is better?","volume":"36","author":"Kuzmin","year":"2023","journal-title":"Advances in neural information processing systems"},{"article-title":"Deep learning using rectified linear units (relu)","year":"2018","author":"Agarap","key":"ref13"},{"article-title":"Gaussian error linear units (gelus)","year":"2016","author":"Hendrycks","key":"ref14"},{"key":"ref15","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC49657.2024.10454343"},{"volume-title":"Open nand flash interface specification revision 5.1","year":"2022","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref19","first-page":"606","article-title":"Efficiently scaling transformer inference","volume-title":"Proceedings of Machine Learning and Systems","volume":"5","author":"Pope"},{"key":"ref20","first-page":"521","article-title":"Orca: A distributed serving system for {Transformer-Based} generative models","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Yu"},{"year":"2023","key":"ref21","article-title":"Ufs 4.0 : Flagship storage"},{"key":"ref22","first-page":"49","article-title":"{MQSim}: A framework for enabling realistic studies of modern {Multi-Queue}{SSD} devices","volume-title":"16th USENIX Conference on File and Storage Technologies (FAST 18)","author":"Tavakkol"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2020.2973991"},{"year":"2018","key":"ref24","article-title":"Design compiler"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00069"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001154"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00035"},{"key":"ref28","first-page":"371","article-title":"Behemoth: a flash-centric training accelerator for extreme-scale {DNNs}","volume-title":"19th USENIX Conference on File and Storage Technologies (FAST 21)","author":"Kim"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-024-00864-x"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3626246.3654691"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.14778\/3685800.3685805"},{"key":"ref32","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3392335"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/775152.775250"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1142\/4610"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4939-7393-4_3"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2008.2011087"}],"event":{"name":"2025 IEEE\/ACM International Conference On Computer Aided Design (ICCAD)","start":{"date-parts":[[2025,10,26]]},"location":"Munich, Germany","end":{"date-parts":[[2025,10,30]]}},"container-title":["2025 IEEE\/ACM International Conference On Computer Aided Design (ICCAD)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11240608\/11240621\/11240826.pdf?arnumber=11240826","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T05:44:54Z","timestamp":1763703894000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11240826\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,26]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/iccad66269.2025.11240826","relation":{},"subject":[],"published":{"date-parts":[[2025,10,26]]}}}