{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,21]],"date-time":"2025-12-21T06:25:36Z","timestamp":1766298336030,"version":"3.44.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008081","name":"Southeast University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100008081","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1109\/dac63849.2025.11132945","type":"proceedings-article","created":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T17:35:41Z","timestamp":1757957741000},"page":"1-7","source":"Crossref","is-referenced-by-count":1,"title":["InfScaler: Enabling Efficient ML Inference Serving on Multi-Accelerator Edge Devices via Asymmetric Auto-Scaling"],"prefix":"10.1109","author":[{"given":"Borui","family":"Li","sequence":"first","affiliation":[{"name":"Southeast University,School of Computer Science and Engineering,Nanjing,China"}]},{"given":"Tiange","family":"Xia","sequence":"additional","affiliation":[{"name":"Southeast University,School of Computer Science and Engineering,Nanjing,China"}]},{"given":"Shuai","family":"Wang","sequence":"additional","affiliation":[{"name":"Southeast University,School of Computer Science and Engineering,Nanjing,China"}]},{"given":"Shuai","family":"Wang","sequence":"additional","affiliation":[{"name":"Southeast University,School of Computer Science and Engineering,Nanjing,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2019.2918951"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247713"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655691"},{"key":"ref4","article-title":"NVIDIA Jetson Xavier"},{"key":"ref5","article-title":"AMD Zynq UltraScale+ MPSoCs"},{"key":"ref6","article-title":"Google Edge TPU"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483276"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3560539"},{"key":"ref9","first-page":"119","article-title":"Ekya: Continuous learning of video analytics models on edge compute servers","volume-title":"Proc. of USENIX NSDI","author":"Bhardwaj"},{"key":"ref10","first-page":"459","article-title":"Video Analytics with Zerostreaming Cameras","volume-title":"Proc. of USENIX ATC","author":"Xu"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3658473"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530400"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586298"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614303"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507709"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00073"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544259"},{"key":"ref18","article-title":"Knative Project"},{"key":"ref19","article-title":"KServe Project"},{"key":"ref20","article-title":"AWS SageMaker Serverless Endpoint"},{"key":"ref21","first-page":"1489","article-title":"Following the data, not the function: Rethinking function orchestration in serverless computing","volume-title":"Proc. of USENIX NSDI","author":"Yu"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604830"},{"key":"ref23","first-page":"205","article-title":"Serverless in the Wild: Characterizing and Optimizing the Serverless Workload at a Large Cloud Provider","volume-title":"Proc. of USENIX ATC","author":"Shahrad"},{"article-title":"Tetris: Memory-efficient serverless inference through tensor sharing","volume-title":"Proc. of USENIX ATC","author":"Li","key":"ref24"},{"key":"ref25","first-page":"1049","article-title":"MArk: Exploiting Cloud Services for Cost-Effective, SLO-Aware Machine Learning Inference Serving","volume-title":"Proc. of USENIX ATC","author":"Zhang"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.14778\/3547305.3547313"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS51616.2021.00022"},{"key":"ref28","article-title":"AWS Lambda"},{"key":"ref29","article-title":"Azure Function"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3131672.3131675"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3093337.3037698"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655686"}],"event":{"name":"2025 62nd ACM\/IEEE Design Automation Conference (DAC)","start":{"date-parts":[[2025,6,22]]},"location":"San Francisco, CA, USA","end":{"date-parts":[[2025,6,25]]}},"container-title":["2025 62nd ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11132383\/11132091\/11132945.pdf?arnumber=11132945","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:44:35Z","timestamp":1758001475000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11132945\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/dac63849.2025.11132945","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]}}}