{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T02:58:42Z","timestamp":1776481122976,"version":"3.51.2"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,9,25]],"date-time":"2023-09-25T00:00:00Z","timestamp":1695600000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,9,25]],"date-time":"2023-09-25T00:00:00Z","timestamp":1695600000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,9,25]]},"DOI":"10.1109\/hpec58863.2023.10363447","type":"proceedings-article","created":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T14:39:57Z","timestamp":1703515197000},"page":"1-9","source":"Crossref","is-referenced-by-count":152,"title":["From Words to Watts: Benchmarking the Energy Costs of Large Language Model Inference"],"prefix":"10.1109","author":[{"given":"Siddharth","family":"Samsi","sequence":"first","affiliation":[{"name":"MIT"}]},{"given":"Dan","family":"Zhao","sequence":"additional","affiliation":[{"name":"NYU"}]},{"given":"Joseph","family":"McDonald","sequence":"additional","affiliation":[{"name":"MIT"}]},{"given":"Baolin","family":"Li","sequence":"additional","affiliation":[{"name":"NYU"}]},{"given":"Adam","family":"Michaleas","sequence":"additional","affiliation":[{"name":"MIT"}]},{"given":"Michael","family":"Jones","sequence":"additional","affiliation":[{"name":"MIT"}]},{"given":"William","family":"Bergeron","sequence":"additional","affiliation":[{"name":"MIT"}]},{"given":"Jeremy","family":"Kepner","sequence":"additional","affiliation":[{"name":"MIT"}]},{"given":"Devesh","family":"Tiwari","sequence":"additional","affiliation":[{"name":"MIT"}]},{"given":"Vijay","family":"Gadepally","sequence":"additional","affiliation":[{"name":"MIT"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Stable Diffusion","year":"2023"},{"key":"ref2","year":"2022","journal-title":"OReilly Media, Inc"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1126\/science.adh4451"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-023-00873-0"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1136\/jme-2023-108909"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1355"},{"key":"ref7","author":"Shoeybi","year":"2020","journal-title":"Megatron-lm: Training multi-billion parameter language models using model parallelism"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476209"},{"key":"ref9","volume-title":"Nvidia\/megatron-lm: Ongoing research training transformer models at scale","year":"2023"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9891914"},{"key":"ref11","volume-title":"The ai brick wall - a practical limit for scaling dense transformer models, and how gpt 4 will break past it","author":"Patel","year":"2023"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.suscom.2023.100857"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW55747.2022.00126"},{"key":"ref14","author":"Touvron","year":"2023","journal-title":"Llama: Open and efficient foundation language models"},{"key":"ref15","volume-title":"Different development paths of llms"},{"key":"ref16","author":"Vaswani","year":"2017","journal-title":"Attention is all you need"},{"key":"ref17","author":"Gozalo-Brizuela","year":"2023","journal-title":"Chatgpt is not all you need. a state of the art review of large generative ai models"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2018.8547629"},{"key":"ref19","volume-title":"Facebook Research","year":"2023"},{"key":"ref20","volume-title":"Fairscale: A general purpose modular pytorch library for high performance and large scale training","year":"2021"},{"key":"ref21","volume-title":"Stanford alpaca: An instruction-following llama model","author":"Taori","year":"2023"},{"key":"ref22","article-title":"Training verifiers to solve math word problems","author":"Cobbe","year":"2021","journal-title":"arXiv preprint"},{"key":"ref23","volume-title":"NVIDIA. Nvidia-smi"},{"key":"ref24","volume-title":"Nvidia data center GPU manager (dcgm)"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-naacl.151"},{"key":"ref26","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)","author":"Devlin","year":"2019"},{"key":"ref27","volume-title":"Multi-Process Service","year":"2023"},{"key":"ref28","volume-title":"NVIDIA Multi Instance GPU User Guide","year":"2023"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563510"}],"event":{"name":"2023 IEEE High Performance Extreme Computing Conference (HPEC)","location":"Boston, MA, USA","start":{"date-parts":[[2023,9,25]]},"end":{"date-parts":[[2023,9,29]]}},"container-title":["2023 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10363430\/10363422\/10363447.pdf?arnumber=10363447","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T15:45:45Z","timestamp":1705074345000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10363447\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,25]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/hpec58863.2023.10363447","relation":{},"subject":[],"published":{"date-parts":[[2023,9,25]]}}}