{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:06:03Z","timestamp":1750309563475,"version":"3.41.0"},"reference-count":12,"publisher":"Association for Computing Machinery (ACM)","issue":"10","license":[{"start":{"date-parts":[[2024,9,26]],"date-time":"2024-09-26T00:00:00Z","timestamp":1727308800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["Commun. ACM"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1145\/3677387","type":"journal-article","created":{"date-parts":[[2024,8,26]],"date-time":"2024-08-26T19:39:45Z","timestamp":1724701185000},"page":"15-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Hybrid Future for AI"],"prefix":"10.1145","volume":"67","author":[{"given":"Chris","family":"Edwards","sequence":"first","affiliation":[{"name":"Surrey, Surrey, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2024,9,26]]},"reference":[{"key":"e_1_3_1_2_1","unstructured":"Bai G. Chai Z. Ling C. Wang S. Lu J. Zhang N. Shi T. Yu Z. Zhu M. Zhang Y. Yang C. Cheng Y. and Zhao L."},{"key":"e_1_3_1_3_1","unstructured":"Beyond efficiency: A Systematic Survey of Resource-Efficient Large Language Models"},{"key":"e_1_3_1_4_1","unstructured":"arXiv:2401.00625 (2024). Accompanying list of LLM optimization papers: https:\/\/github.com\/tiingweii-shii\/Awesome-Resource-Efficient-LLM-Papers"},{"key":"e_1_3_1_5_1","unstructured":"Liu Z. Wang J. Dao T. Zhou T. Yuan B. Song Z. Shrivastava A. Zhang C. Tian Y. R\u00e9 C. and Chen B."},{"key":"e_1_3_1_6_1","unstructured":"D\u00e9j\u00e0 Vu: Contextual Sparsity for Efficient LLMs at Inference Time"},{"key":"e_1_3_1_7_1","unstructured":"Proceedings of the 40th International Conference on Machine Learning (2023) Article No.919 pp 22137-22176"},{"key":"e_1_3_1_8_1","unstructured":"Leviathan Y. Kalman M. and Matias Y."},{"key":"e_1_3_1_9_1","unstructured":"Fast Inference from Transformers via Speculative Decoding"},{"key":"e_1_3_1_10_1","unstructured":"Proceedings of the 40th International Conference on Machine Learning (2023) Article No.795 pp 19274-19286"},{"key":"e_1_3_1_11_1","unstructured":"Devvrit K. Kudugunta S. Kusupati A. Dettmers T. Chen K. Dhillon I.S. Tsvetkov Y. Hajishirzi H. Kakade S.M. Farhadi A. and Jain P."},{"key":"e_1_3_1_12_1","unstructured":"MatFormer: Nested Transformer for Elastic Inference"},{"key":"e_1_3_1_13_1","unstructured":"Workshop on Advancing Neural Network Training (WANT) at NeurIPS 2023. arXiv: 2310.07707"}],"container-title":["Communications of the ACM"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677387","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3677387","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:19:03Z","timestamp":1750295943000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677387"}},"subtitle":["The drive for efficiency brings large language models out of the cloud."],"short-title":[],"issued":{"date-parts":[[2024,9,26]]},"references-count":12,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["10.1145\/3677387"],"URL":"https:\/\/doi.org\/10.1145\/3677387","relation":{},"ISSN":["0001-0782","1557-7317"],"issn-type":[{"type":"print","value":"0001-0782"},{"type":"electronic","value":"1557-7317"}],"subject":[],"published":{"date-parts":[[2024,9,26]]},"assertion":[{"value":"2024-09-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}