{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T01:07:53Z","timestamp":1773277673528,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620666.3651324","type":"proceedings-article","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T12:08:21Z","timestamp":1713960501000},"page":"545-560","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":42,"title":["IANUS: Integrated Accelerator based on NPU-PIM Unified Memory System"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3648-5575","authenticated-orcid":false,"given":"Minseok","family":"Seo","sequence":"first","affiliation":[{"name":"Electrical and Computer Engineering, Seoul National University, Inter-university Semiconductor Research Center, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7527-6971","authenticated-orcid":false,"given":"Xuan Truong","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Electrical and Computer Engineering, Seoul National University, Inter-university Semiconductor Research Center, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2668-729X","authenticated-orcid":false,"given":"Seok Joong","family":"Hwang","sequence":"additional","affiliation":[{"name":"SAPEON Inc., Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9511-4734","authenticated-orcid":false,"given":"Yongkee","family":"Kwon","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7076-2818","authenticated-orcid":false,"given":"Guhyun","family":"Kim","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5764-5246","authenticated-orcid":false,"given":"Chanwook","family":"Park","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0881-9292","authenticated-orcid":false,"given":"Ilkon","family":"Kim","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9006-4904","authenticated-orcid":false,"given":"Jaehan","family":"Park","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5950-0056","authenticated-orcid":false,"given":"Jeongbin","family":"Kim","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9454-9966","authenticated-orcid":false,"given":"Woojae","family":"Shin","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4632-0072","authenticated-orcid":false,"given":"Jongsoon","family":"Won","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8933-6226","authenticated-orcid":false,"given":"Haerang","family":"Choi","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9585-3835","authenticated-orcid":false,"given":"Kyuyoung","family":"Kim","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2033-8928","authenticated-orcid":false,"given":"Daehan","family":"Kwon","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1666-7878","authenticated-orcid":false,"given":"Chunseok","family":"Jeong","sequence":"additional","affiliation":[{"name":"SK hynix, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9398-1507","authenticated-orcid":false,"given":"Sangheon","family":"Lee","sequence":"additional","affiliation":[{"name":"SAPEON Inc., Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6512-0291","authenticated-orcid":false,"given":"Yongseok","family":"Choi","sequence":"additional","affiliation":[{"name":"SAPEON Inc., Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2720-3102","authenticated-orcid":false,"given":"Wooseok","family":"Byun","sequence":"additional","affiliation":[{"name":"SAPEON Inc., Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1624-4586","authenticated-orcid":false,"given":"Seungcheol","family":"Baek","sequence":"additional","affiliation":[{"name":"SAPEON Inc., Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8895-9117","authenticated-orcid":false,"given":"Hyuk-Jae","family":"Lee","sequence":"additional","affiliation":[{"name":"Electrical and Computer Engineering, Seoul National University, Inter-university Semiconductor Research Center, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3958-3891","authenticated-orcid":false,"given":"John","family":"Kim","sequence":"additional","affiliation":[{"name":"School of Computing, KAIST, Daejeon, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"1495","volume-title":"Automation & Test in Europe Conference & Exhibition (DATE)","author":"Ahn Minwook","year":"2019","unstructured":"Minwook Ahn, Seok Joong Hwang, Wonsub Kim, Seungrok Jung, Yeonbok Lee, Mookyoung Chung, Woohyung Lim, and Youngjoon Kim. Aix: A high performance and energy efficient inference accelerator on fpga for a dnn-based commercial speech recognition. In 2019 Design, Automation & Test in Europe Conference & Exhibition (DATE), pages 1495--1500. IEEE, 2019."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001138"},{"key":"e_1_3_2_1_3_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton. Layer normalization. arXiv preprint arXiv:1607.06450","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. Layer normalization. arXiv preprint arXiv:1607.06450, 2016."},{"key":"e_1_3_2_1_4_1","volume-title":"Training stochastic model recognition algorithms as networks can lead to maximum mutual information estimation of parameters. Advances in neural information processing systems, 2","author":"Bridle John","year":"1989","unstructured":"John Bridle. Training stochastic model recognition algorithms as networks can lead to maximum mutual information estimation of parameters. Advances in neural information processing systems, 2, 1989."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2654822.2541967"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00072"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2019.8875680"},{"key":"e_1_3_2_1_9_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441578"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/800046.801649"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00012"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00040"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00062"},{"key":"e_1_3_2_1_18_1","volume-title":"Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415","author":"Hendrycks Dan","year":"2016","unstructured":"Dan Hendrycks and Kevin Gimpel. Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415, 2016."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00051"},{"key":"e_1_3_2_1_20_1","volume-title":"2019 IEEE Hot Chips 31 Symposium (HCS)","author":"Hwang Seok Joong","year":"2019","unstructured":"Seok Joong Hwang, Jeongho Han, Minwook Ahn, Seungrok Jung, Wonsub Kim, Yongshik Moon, Sangjun Yang, Moo-Kyoung Chung, Jaehyeok Jang, Youngjae Jin, Yongsang Park, Namseob Lee, Daewoo Kim, Euiseok Kim, Choong Hwan Choi, and Heeyul Lee. Aix v2: Flexible high performance ai inference accelerator for datacenters. In 2019 IEEE Hot Chips 31 Symposium (HCS), 2019."},{"key":"e_1_3_2_1_21_1","volume-title":"Morgan Kaufmann","author":"Jacob Bruce","year":"2010","unstructured":"Bruce Jacob, David Wang, and Spencer Ng. Memory systems: cache, DRAM, disk. Morgan Kaufmann, 2010."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00010"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2022.3164651"},{"key":"e_1_3_2_1_26_1","first-page":"256","volume-title":"Sparse Matrix Proceedings 1978","volume":"1","author":"Kung Hsiang Tsung","year":"1979","unstructured":"Hsiang Tsung Kung and Charles E Leiserson. Systolic arrays (for vlsi). In Sparse Matrix Proceedings 1978, volume 1, pages 256--282. Society for industrial and applied mathematics Philadelphia, PA, USA, 1979."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3200718"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3296957.3173176"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/HCS55958.2022.9895629"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42613.2021.9365862"},{"key":"e_1_3_2_1_31_1","first-page":"1","volume-title":"2022 IEEE International Solid-State Circuits Conference (ISSCC)","volume":"65","author":"Lee Seongju","year":"2022","unstructured":"Seongju Lee, Kyuyoung Kim, Sanghoon Oh, Joonhong Park, Gimoon Hong, Dongyoon Ka, Kyudong Hwang, Jeongje Park, Kyeongpil Kang, Jungyeon Kim, Junyeol Jeon, Nahsung Kim, Yongkee Kwon, Kornijcuk Vladimir, Woojae Shin, Jongsoon Won, Minkyu Lee, Hyunha Joo, Haerang Choi, Jaewook Lee, Donguc Ko, Younggun Jun, Keewon Cho, Ilwoong Kim, Choungki Song, Chunseok Jeong, Daehan Kwon, Jieun Jang, Il Park, Junhyun Chun, and Joohwan Cho. A 1ynm 1.25 v 8gb, 16gb\/s\/pin gddr6-based accelerator-in-memory supporting 1tflops mac operation and various activation functions for deep-learning applications. In 2022 IEEE International Solid-State Circuits Conference (ISSCC), volume 65, pages 1--3. IEEE, 2022."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00013"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123977"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2786763.2694358"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480125"},{"key":"e_1_3_2_1_36_1","unstructured":"Micron. Gddr6 datasheet. [Online]. Available: https:\/\/media-www.micron.com\/-\/media\/client\/global\/documents\/products\/data-sheet\/dram\/gddr\/gddr6\/gddr6_sgram_8gb_brief.pdf."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3058217"},{"key":"e_1_3_2_1_38_1","unstructured":"NVIDIA. Nvidia a100 tensor core gpu. [Online]. Available: https:\/\/www.nvidia.com\/en-us\/data-center\/a100\/."},{"key":"e_1_3_2_1_39_1","unstructured":"OpenAI. Input:output token ratio. [Online]. Available: https:\/\/beta.openai.com\/docs\/usage-guidelines\/use-case-guidelines."},{"issue":"8","key":"e_1_3_2_1_40_1","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. Language models are unsupervised multitask learners. OpenAI blog, 1(8):9, 2019.","journal-title":"OpenAI blog"},{"key":"e_1_3_2_1_41_1","unstructured":"SAPEON. Product of SAPEON - X330. [Online]. Available: https:\/\/www.sapeon.com\/products\/sapeon-x330."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2857044"},{"key":"e_1_3_2_1_43_1","volume-title":"Megatron-lm: Training multi-billion parameter language models using model parallelism. arXiv preprint arXiv:1909.08053","author":"Shoeybi Mohammad","year":"2019","unstructured":"Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper, and Bryan Catanzaro. Megatron-lm: Training multi-billion parameter language models using model parallelism. arXiv preprint arXiv:1909.08053, 2019."},{"key":"e_1_3_2_1_44_1","volume-title":"Attention is all you need. Advances in neural information processing systems, 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. Attention is all you need. Advances in neural information processing systems, 30, 2017."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"e_1_3_2_1_46_1","first-page":"4","article-title":"The secret to high performance on cloud tpus","author":"Wang Shibo","year":"2019","unstructured":"Shibo Wang and Pankaj Kanwar. Bfloat16: The secret to high performance on cloud tpus. Google Cloud Blog, 4, 2019.","journal-title":"Google Cloud Blog"},{"key":"e_1_3_2_1_47_1","volume-title":"Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. Huggingface's transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771","author":"Wolf Thomas","year":"2019","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, Joe Davison, Sam Shleifer, Patrick von Platen, Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu, Teven Le Scao, Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. Huggingface's transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771, 2019."},{"key":"e_1_3_2_1_48_1","unstructured":"Xilinx. Xilinx VCU118 Evaluation Kit. [Online]. Available: https:\/\/www.xilinx.com\/products\/boards-and-kits\/vcu118.html."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00059"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530505"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00082"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651324","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:42Z","timestamp":1750291422000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651324"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":51,"alternative-id":["10.1145\/3620666.3651324","10.1145\/3620666"],"URL":"https:\/\/doi.org\/10.1145\/3620666.3651324","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}