{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T01:16:01Z","timestamp":1755998161253,"version":"3.41.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.23919\/date64628.2025.10992712","type":"proceedings-article","created":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T17:36:35Z","timestamp":1747848995000},"page":"1-7","source":"Crossref","is-referenced-by-count":1,"title":["Distributed Inference with Minimal Off-Chip Traffic for Transformers on Low-Power MCUs"],"prefix":"10.23919","author":[{"given":"Severin","family":"Bochem","sequence":"first","affiliation":[{"name":"D-ITET, ETH Zurich,Switzerland"}]},{"given":"Victor J.B.","family":"Jung","sequence":"additional","affiliation":[{"name":"ETH Zurich,Integrated Systems Laboratory,Switzerland"}]},{"given":"Arpan Suravi","family":"Prasad","sequence":"additional","affiliation":[{"name":"ETH Zurich,Integrated Systems Laboratory,Switzerland"}]},{"given":"Francesco","family":"Conti","sequence":"additional","affiliation":[{"name":"DEI, and Information Engineering, University of Bologna,Italy"}]},{"given":"Luca","family":"Benini","sequence":"additional","affiliation":[{"name":"ETH Zurich,Integrated Systems Laboratory,Switzerland"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2105.15203"},{"article-title":"BERT: Pretraining of deep bidirectional transformers for language understanding","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)","author":"Devlin","key":"ref3"},{"volume-title":"Gpt-4 technical report","year":"2024","author":"O","key":"ref4"},{"volume-title":"Llama: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref5"},{"volume-title":"Hierarchical text-conditional image generation with clip latents","year":"2022","author":"Ramesh","key":"ref6"},{"volume-title":"Gazegpt: Augmenting human capabilities using gaze-contingent contextual ai for smart eyewear","year":"2024","author":"Konrad","key":"ref7"},{"volume-title":"Tinystories: How small can language models be and still speak coherent english?","year":"2023","author":"Eldan","key":"ref8"},{"volume-title":"Tinyllama: An open-source small language model","year":"2024","author":"Zhang","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.195"},{"article-title":"Xrbench: An extended reality (xr) machine learning benchmark suite for the metaverse","volume-title":"Proceedings of Machine Learning and Systems","author":"Kwon","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00051"},{"key":"ref13","article-title":"Efficiently scaling transformer inference","author":"Pope","year":"2022","journal-title":"CoRR"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2024.3385987"},{"volume-title":"llama2.c: Inference llama 2 in one file of pure c","year":"2023","author":"Karpathy","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527405"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD53106.2021.00071"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/COINS51742.2021.9524173"},{"volume-title":"Gaussian error linear units (gelus)","year":"2023","author":"Hendrycks","key":"ref19"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2858384"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS60910.2024.00017"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD63220.2024.00076"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2018.8445101"},{"volume-title":"Llama 2: Open foundation and fine-tuned chat models","year":"2023","author":"Touvron","key":"ref24"},{"volume-title":"Mixtral of experts","year":"2024","author":"Jiang","key":"ref25"},{"volume-title":"Textbooks are all you need ii: phi-1.5 technical report","year":"2023","author":"Li","key":"ref26"},{"volume-title":"Textbooks are all you need","year":"2023","author":"Gunasekar","key":"ref27"},{"key":"ref28","first-page":"32431","article-title":"MobileLLM: Optimizing sub-billion parameter language models for on-device use cases","volume-title":"ser. Proceedings of Machine Learning Research","volume":"235","author":"Liu"},{"key":"ref29","article-title":"Sheared LLaMA: Accelerating language model pretraining via structured pruning","volume-title":"The Twelfth International Conference on Learning Representations","author":"Xia","year":"2024"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00180"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/DSD57027.2022.00048"},{"volume-title":"Distributed on-sensor compute system for ar\/vr devices: A semi-analytical simulation framework for power estimation","year":"2022","author":"Gomez","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2024.3443718"}],"event":{"name":"2025 Design, Automation &amp; Test in Europe Conference (DATE)","start":{"date-parts":[[2025,3,31]]},"location":"Lyon, France","end":{"date-parts":[[2025,4,2]]}},"container-title":["2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10992638\/10992588\/10992712.pdf?arnumber=10992712","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T05:32:24Z","timestamp":1747891944000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10992712\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":33,"URL":"https:\/\/doi.org\/10.23919\/date64628.2025.10992712","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]}}}