{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T00:10:13Z","timestamp":1760746213775,"version":"build-2065373602"},"reference-count":12,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T00:00:00Z","timestamp":1757894400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T00:00:00Z","timestamp":1757894400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,15]]},"DOI":"10.1109\/hpec67600.2025.11196629","type":"proceedings-article","created":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T17:35:37Z","timestamp":1760636137000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Predicting LLM Inference Server Request Capacity"],"prefix":"10.1109","author":[{"given":"Daniel J.","family":"Burrill","sequence":"first","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"LaToya","family":"Anderson","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"William","family":"Arcand","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"David","family":"Bestor","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"William","family":"Bergeron","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Alex","family":"Bonn","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Chansup","family":"Byun","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Michael","family":"Houle","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Matthew","family":"Hubbell","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Michael","family":"Jones","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Piotr","family":"Luszczek","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Peter","family":"Michaleas","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Guillermo","family":"Morales","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Julie","family":"Mullen","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Andrew","family":"Prout","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Albert","family":"Reuther","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Antonio","family":"Rosa","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Charles","family":"Yee","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]},{"given":"Vijay","family":"Gadepally","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory,MA"}]}],"member":"263","reference":[{"article-title":"Phi-4-reasoning technical report","year":"2025","author":"Abdin","key":"ref1"},{"key":"ref2","first-page":"351","article-title":"Vidur: A large-scale simulation framework for LLM inference","volume-title":"Proceedings of Machine Learning and Systems","volume":"6","author":"Agrawal"},{"article-title":"Efficient LLM serving on hybrid real-time and best-effort requests","year":"2025","author":"Borui","key":"ref3"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-73699-0","volume-title":"Building intuition: insights from basic operations management models and principles","volume":"115","author":"Chhajed","year":"2008"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/SCW63240.2024.00178"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-025-09422-z"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1287\/opre.9.3.383"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1287\/stsy.2025.0106"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC58863.2023.10363447"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA61900.2025.00102"},{"article-title":"Inference scaling laws: An empirical analysis of compute-optimal inference for LLM problem-solving","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Wu","key":"ref12"}],"event":{"name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","start":{"date-parts":[[2025,9,15]]},"location":"Wakefield, MA, USA","end":{"date-parts":[[2025,9,19]]}},"container-title":["2025 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11196085\/11196088\/11196629.pdf?arnumber=11196629","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T04:49:30Z","timestamp":1760676570000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11196629\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,15]]},"references-count":12,"URL":"https:\/\/doi.org\/10.1109\/hpec67600.2025.11196629","relation":{},"subject":[],"published":{"date-parts":[[2025,9,15]]}}}