{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T14:31:30Z","timestamp":1760711490656,"version":"3.40.3"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,9,23]],"date-time":"2024-09-23T00:00:00Z","timestamp":1727049600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,23]],"date-time":"2024-09-23T00:00:00Z","timestamp":1727049600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,23]]},"DOI":"10.1109\/hpec62836.2024.10938418","type":"proceedings-article","created":{"date-parts":[[2025,4,3]],"date-time":"2025-04-03T19:07:19Z","timestamp":1743707239000},"page":"1-8","source":"Crossref","is-referenced-by-count":3,"title":["Breakthrough Low-Latency, High-Energy-Efficiency LLM Inference Performance Using NorthPole"],"prefix":"10.1109","author":[{"given":"Rathinakumar","family":"Appuswamy","sequence":"first","affiliation":[{"name":"IBM Research"}]},{"given":"Michael V.","family":"Debole","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Brian","family":"Taba","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Steven K.","family":"Esser","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Andrew S.","family":"Cassidy","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Arnon","family":"Amir","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Alexander","family":"Andreopoulos","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Deepika","family":"Bablani","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Pallab","family":"Datta","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Jeffrey A.","family":"Kusnitz","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Nathaniel J.","family":"McClatchey","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Neil","family":"McGlohon","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Jeffrey L.","family":"McKinstry","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Tapan K.","family":"Nayak","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Daniel F.","family":"Smith","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Rafael","family":"Sousa","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Ignacio","family":"Terrizzano","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Filipp","family":"Akopyan","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Peter J.","family":"Carlson","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Rajamohan","family":"Gandhasri","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Guillaume J.","family":"Garreau","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Nelson M.","family":"Gonzalez","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Megumi","family":"Ito","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Jennifer L.","family":"Klamo","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Yutaka","family":"Nakamura","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Carlos Ortega","family":"Otero","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"William P.","family":"Risk","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Jun","family":"Sawada","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Kai","family":"Schleupen","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Jay","family":"Sivagnaname","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Matthew","family":"Stallone","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Takanori","family":"Ueda","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Myron D.","family":"Flickner","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"John V.","family":"Arthur","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Rameswar","family":"Panda","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"David D.","family":"Cox","sequence":"additional","affiliation":[{"name":"IBM Research"}]},{"given":"Dharmendra S.","family":"Modha","sequence":"additional","affiliation":[{"name":"IBM Research"}]}],"member":"263","reference":[{"volume-title":"Language models are few-shot learners","year":"2020","author":"Brown","key":"ref1"},{"volume-title":"Harnessing the power of LLMs in practice: A survey on ChatGPT and beyond","year":"2023","author":"Yang","key":"ref2"},{"volume-title":"Evaluating large language models trained on code","year":"2021","author":"Chen","key":"ref3"},{"journal-title":"Granite code models: A family of open foundation models for code intelligence","year":"2024","author":"Mishra","key":"ref4"},{"volume-title":"GPT-4 technical report","year":"2023","author":"O.","key":"ref5"},{"volume-title":"The rise and rise of A.I. large language models (LLMs) & their associated bots like ChatGPT","year":"2024","author":"McCandless","key":"ref6"},{"journal-title":"The rising costs of training frontier AI models","year":"2024","author":"Cottier","key":"ref7"},{"volume-title":"From words to watts: Benchmarking the energy costs of large language model inference","year":"2023","author":"Samsi","key":"ref8"},{"volume-title":"How to manage AI\u2019s energy demand \u2014 today, tomorrow and in the future","year":"2024","author":"Ammanath","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1022594.1022596"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1126\/science.adh1174"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC49657.2024.10454451"},{"article-title":"Llama 3 model card","volume-title":"AI@Meta","year":"2024","key":"ref13"},{"volume-title":"Mistral 7B","year":"2023","author":"Jiang","key":"ref14"},{"volume-title":"GPipe: Efficient training of giant neural networks using pipeline parallelism","year":"2019","author":"Huang","key":"ref15"},{"volume-title":"Mesh-TensorFlow: Deep learning for supercomputers","year":"2018","author":"Shazeer","key":"ref16"},{"volume-title":"Megatron-LM: Training multi-billion parameter language models using model parallelism","year":"2020","author":"Shoeybi","key":"ref17"},{"key":"ref18","article-title":"Learned step size quantization","volume-title":"International Conference on Learning Representations","author":"Esser","year":"2020"},{"journal-title":"Octopack: Instruction tuning code large language models","year":"2023","author":"Muennighoff","key":"ref19"},{"article-title":"NVIDIA ADA GPU Architecture (V2.01)","volume-title":"NVIDIA Corporation","year":"2023","key":"ref20"},{"article-title":"NVIDIA Ampere GA102 GPU Architecture (V2.1)","volume-title":"NVIDIA Corporation","year":"2021","key":"ref21"},{"article-title":"NVIDIA H100 Tensor Core GPU Architecture (V1.04)","volume-title":"NVIDIA Corporation","year":"2023","key":"ref22"}],"event":{"name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","start":{"date-parts":[[2024,9,23]]},"location":"Wakefield, MA, USA","end":{"date-parts":[[2024,9,27]]}},"container-title":["2024 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10938401\/10938415\/10938418.pdf?arnumber=10938418","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T11:10:19Z","timestamp":1743765019000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10938418\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,23]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/hpec62836.2024.10938418","relation":{},"subject":[],"published":{"date-parts":[[2024,9,23]]}}}