{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T15:36:21Z","timestamp":1778168181713,"version":"3.51.4"},"reference-count":84,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,11,1]],"date-time":"2020-11-01T00:00:00Z","timestamp":1604188800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,11,1]],"date-time":"2020-11-01T00:00:00Z","timestamp":1604188800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,11,1]],"date-time":"2020-11-01T00:00:00Z","timestamp":1604188800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100008536","name":"Amazon Web Services","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100008536","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,11]]},"DOI":"10.1109\/sc41405.2020.00073","type":"proceedings-article","created":{"date-parts":[[2021,2,23]],"date-time":"2021-02-23T01:26:48Z","timestamp":1614043608000},"page":"1-15","source":"Crossref","is-referenced-by-count":158,"title":["BATCH: Machine Learning Inference Serving on Serverless Platforms with Adaptive Batching"],"prefix":"10.1109","author":[{"given":"Ahsan","family":"Ali","sequence":"first","affiliation":[]},{"given":"Riccardo","family":"Pinciroli","sequence":"additional","affiliation":[]},{"given":"Feng","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Evgenia","family":"Smirni","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1145\/3368454"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/QEST.2008.33"},{"key":"ref71","year":"2020","journal-title":"Load testing for production variant automatic scaling"},{"key":"ref70","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v31i1.11231","article-title":"Inception-v4, inception-resnet and the impact of residual connections on learning","author":"szegedy","year":"2017","journal-title":"Thirty-First AAAI Conference on Artificial Intelligence"},{"key":"ref76","first-page":"133","article-title":"Peeking behind the curtains of serverless platforms","author":"wang","year":"2018","journal-title":"2018 USENIX Annual Technical Conference (USENIX ATC 18)"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E.2018.00039"},{"key":"ref74","year":"2019","journal-title":"$7 72 billion function-as-a-service market 2017 - global forecast to 2021 Increasing shift from devops to serverless computing to drive the overall function-as-a-service market - research and markets"},{"key":"ref39","year":"2019","journal-title":"AWS Lambda &#x2013; pricing"},{"key":"ref75","year":"2020","journal-title":"New for aws lambda &#x2013; predictable start-up times with provisioned concurrency"},{"key":"ref38","year":"2019","journal-title":"AWS autoscaling"},{"key":"ref78","first-page":"57","article-title":"Sock: Rapid task provisioning with serverlessoptimized containers","author":"oakes","year":"2018","journal-title":"2018 USENIX Annual Technical Conference (USENIX ATC 18)"},{"key":"ref79","article-title":"Agile cold starts for scalable serverless","author":"mohan","year":"2019","journal-title":"11th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 19)"},{"key":"ref33","first-page":"97","article-title":"Disk drive level workload characterization","author":"riska","year":"2006","journal-title":"Proceedings of the 2006 USENIX Annual Technical Conference"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/65.844498"},{"key":"ref31","year":"2019","journal-title":"Amazon build train and deploy machine learning models at scale"},{"key":"ref30","year":"2017","journal-title":"Twitter streaming traces"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-89856-6_14"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.peva.2007.06.016"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2016.38"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/1555228.1555267"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/1453175.1453182"},{"key":"ref62","article-title":"Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems","author":"chen","year":"2015","journal-title":"arXiv preprint arXiv 1512 03385"},{"key":"ref61","year":"2019","journal-title":"Lambda package documentation"},{"key":"ref63","year":"2019","journal-title":"Boto 3 documentation"},{"key":"ref28","author":"ali","year":"2020","journal-title":"Batch pre-release"},{"key":"ref64","year":"2020","journal-title":"AWS CloudWatch"},{"key":"ref27","first-page":"3971","article-title":"On-the-fly operation batching in dynamic computation graphs","author":"neubig","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref65","year":"2020","journal-title":"Amazon EC2 T2 Instances"},{"key":"ref66","year":"2020","journal-title":"Aws kinesis"},{"key":"ref29","year":"2019","journal-title":"Nys thruway origin and destination points for all vehicles"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref68","article-title":"Mobinet: A mobile binary network for image classification","author":"phan","year":"2019","journal-title":"arXiv preprint arXiv 1907 11634"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref2","year":"2019","journal-title":"Ibm cloud &#x2013; cloud functions"},{"key":"ref1","year":"2019","journal-title":"AWS Lambda &#x2013; Serverless Compute"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC\/SmartCity\/DSS.2019.00334"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2013.72"},{"key":"ref21","first-page":"613","article-title":"Clipper: A low-latency online prediction serving system","author":"crankshaw","year":"2017","journal-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/2568088.2568104"},{"key":"ref23","first-page":"492","article-title":"Enhancing data availability in disk drives through background activities","author":"mi","year":"2008","journal-title":"38th Annual IEE\/IFIP International Conference on Dependable Systems and Networks 2008"},{"key":"ref26","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","author":"abadi","year":"2016","journal-title":"12th USENIX Symp Operating Systems Design and Implementation (OSDI 16)"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/1534912.1534913"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2009.135"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/0305-0548(88)90026-3"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-46029-2_14"},{"key":"ref58","volume":"5","author":"neuts","year":"1989","journal-title":"Structured Stochastic Matrices of M\/G\/1 Type and Their Applications"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.peva.2009.09.003"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/WSC.2009.5429563"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-41154-0_26"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1239\/jap\/1032438394"},{"key":"ref53","article-title":"The markov modulated poisson process and markov poisson cascade with applications to web traffic modeling","author":"bernardo","year":"2003","journal-title":"Bayesian Statistics"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/0166-5316(93)90035-S"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3267815"},{"key":"ref11","article-title":"A case for serverless machine learning","volume":"2018","author":"carreira","year":"2018","journal-title":"Workshop on Systems for ML and Open Source Software at NeurIPS"},{"key":"ref40","first-page":"937","article-title":"Cavs: An efficient runtime system for dynamic neural networks","author":"xu","year":"2018","journal-title":"2018 USENIX Annual Technical Conference (USENIX ATC 18)"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E.2018.00052"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737391"},{"key":"ref14","first-page":"1","article-title":"Neural architecture search: A survey","volume":"20","author":"elsken","year":"2019","journal-title":"Journal of Machine Learning Research"},{"key":"ref15","first-page":"63","article-title":"Neural architecture search","year":"2019","journal-title":"Automata Machine Learning"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303958"},{"key":"ref16","article-title":"Mark: Exploiting cloud services for cost-effective, slo-aware machine learning inference serving","author":"zhang","year":"2019","journal-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)"},{"key":"ref81","first-page":"47","article-title":"Stout: An adaptive interface to scalable cloud storage","author":"mccullough","year":"2010","journal-title":"Proc of the USENIX Annual Technical Conference&#x2013;ATC"},{"key":"ref17","article-title":"Serverless computing: One step forward, two steps back","author":"hellerstein","year":"2019","journal-title":"CIDR 2019 9th Biennial Conference on Innovative Data Systems Research"},{"key":"ref84","year":"2020","journal-title":"AWS Batch"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E.2019.00-10"},{"key":"ref83","first-page":"72","article-title":"Capacity planning of fog computing infrastructures for smart monitoring","author":"pinciroli","year":"2017","journal-title":"Workshop on New Frontiers in Quantitative Methods in Informatics"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3135974.3135993"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1145\/3360468.3366781"},{"key":"ref4","year":"2019","journal-title":"Cloud functions &#x2013; serverless environment to build and connect cloud services &#x2014; google cloud platform"},{"key":"ref3","year":"2019","journal-title":"Azure functionsserverless architecture &#x2013; microsoft azure"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3154847.3154853"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2018.2806620"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SCC.2019.00018"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCSW.2017.36"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2018.2808352"},{"key":"ref9","author":"rajewski","year":"2018","journal-title":"System and method for live streaming content to subscription audiences using a serverless computing system"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/2670979.2671008"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.25"},{"key":"ref48","author":"kleinrock","year":"1975","journal-title":"Queueing Systems Volume I Theory"},{"key":"ref47","first-page":"28","article-title":"Pslo: enforcing the x th percentile latency and throughput slos for consolidated vm storage","author":"li","year":"2016","journal-title":"Proceedings of the Eleventh European Conference on Computer Systems"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.peva.2009.12.003"},{"key":"ref41","year":"2020","journal-title":"Make Data Useful"},{"key":"ref44","doi-asserted-by":"crossref","DOI":"10.1002\/9780470770771","volume":"777","author":"royston","year":"2008","journal-title":"Multivariable model-building a pragmatic approach to regression anaylsis based on fractional polynomials for modelling continuous variables"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.2307\/3213143"}],"event":{"name":"SC20: International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Atlanta, GA, USA","start":{"date-parts":[[2020,11,9]]},"end":{"date-parts":[[2020,11,19]]}},"container-title":["SC20: International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9355221\/9355202\/09355312.pdf?arnumber=9355312","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,18]],"date-time":"2022-12-18T15:34:53Z","timestamp":1671377693000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9355312\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11]]},"references-count":84,"URL":"https:\/\/doi.org\/10.1109\/sc41405.2020.00073","relation":{},"subject":[],"published":{"date-parts":[[2020,11]]}}}