{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T17:09:57Z","timestamp":1773248997914,"version":"3.50.1"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T00:00:00Z","timestamp":1726444800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T00:00:00Z","timestamp":1726444800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000781","name":"European Research Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000781","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004040","name":"KU Leuven","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004040","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,16]]},"DOI":"10.1109\/socc62300.2024.10737844","type":"proceedings-article","created":{"date-parts":[[2024,11,5]],"date-time":"2024-11-05T18:29:41Z","timestamp":1730831381000},"page":"1-6","source":"Crossref","is-referenced-by-count":9,"title":["Energy Cost Modelling for Optimizing Large Language Model Inference on Hardware Accelerators"],"prefix":"10.1109","author":[{"given":"Robin","family":"Geens","sequence":"first","affiliation":[{"name":"KU Leuven,MICAS,Belgium"}]},{"given":"Man","family":"Shi","sequence":"additional","affiliation":[{"name":"KU Leuven,MICAS,Belgium"}]},{"given":"Arne","family":"Symons","sequence":"additional","affiliation":[{"name":"KU Leuven,MICAS,Belgium"}]},{"given":"Chao","family":"Fang","sequence":"additional","affiliation":[{"name":"KU Leuven,MICAS,Belgium"}]},{"given":"Marian","family":"Verhelst","sequence":"additional","affiliation":[{"name":"KU Leuven,MICAS,Belgium"}]}],"member":"263","reference":[{"key":"ref1","first-page":"709","article-title":"Characterization of large language model development in the datacenter","volume-title":"21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"Hu"},{"key":"ref2","article-title":"QServe: W4A8KV4 Quantization and System Co-design for Efficient LLM Serving","author":"Lin","year":"2024","journal-title":"arXiv preprint arXiv:2405.04532"},{"key":"ref3","article-title":"A survey on hardware accelerators for large language models","author":"Kachris","year":"2024","journal-title":"arXiv preprint arXiv:2401.09890"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2021.3059962"},{"key":"ref5","volume-title":"Llm inference unveiled: Survey and roofline model insights","author":"Yuan","year":"2024"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00021"},{"key":"ref7","volume-title":"A hardware evaluation framework for large language model inference","author":"Zhang","year":"2023"},{"key":"ref8","article-title":"Megatron-lm: Training multi-billion parameter language models using model parallelism","author":"Shoeybi","year":"2019","journal-title":"arXiv preprint arXiv:1909.08053"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589057"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00062"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00050"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2022.3197282"},{"key":"ref13","article-title":"OmniQuant: Omnidirectionally Calibrated Quantization for Large Language Models","volume-title":"The Twelfth International Conference on Learning Representations (ICLR)","author":"Shao"},{"key":"ref14","article-title":"AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration","volume-title":"The Seventh Annual Conference on Machine Learning and Systems (MLSys)","author":"Lin"},{"key":"ref15","article-title":"Optq: Accurate quantization for generative pre-trained transformers","volume-title":"The Eleventh International Conference on Learning Representations (ICLR)","author":"Frantar"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3626202.3637562"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589038"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00064"},{"key":"ref19","volume-title":"Onnx: Open neural network exchange","year":"2024"},{"key":"ref20","volume-title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","author":"Paszke","year":"2019"},{"key":"ref21","volume-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"ref23","article-title":"Fusemax: Leveraging extended einsums to optimize attention accelerator design","author":"Nayak","year":"2024","journal-title":"Machine Learning for Computer Architecture and Systems 2024"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/2159430.2159433"},{"key":"ref25","volume-title":"Coral ai","year":"2020"},{"key":"ref26","volume-title":"Gpt-3: Language models are few-shot learners","year":"2020"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"ref28","volume-title":"The Era of 1-bit LLMs: All Large Language Models are in 1.58 Bits","author":"Ma","year":"2024"}],"event":{"name":"2024 IEEE 37th International System-on-Chip Conference (SOCC)","location":"Dresden, Germany","start":{"date-parts":[[2024,9,16]]},"end":{"date-parts":[[2024,9,19]]}},"container-title":["2024 IEEE 37th International System-on-Chip Conference (SOCC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10737725\/10737709\/10737844.pdf?arnumber=10737844","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T13:53:43Z","timestamp":1732715623000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10737844\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,16]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/socc62300.2024.10737844","relation":{},"subject":[],"published":{"date-parts":[[2024,9,16]]}}}