{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T15:36:23Z","timestamp":1778168183576,"version":"3.51.4"},"reference-count":50,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,1]],"date-time":"2021-07-01T00:00:00Z","timestamp":1625097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,1]],"date-time":"2021-07-01T00:00:00Z","timestamp":1625097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,1]],"date-time":"2021-07-01T00:00:00Z","timestamp":1625097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7]]},"DOI":"10.1109\/icdcs51616.2021.00022","type":"proceedings-article","created":{"date-parts":[[2021,10,5]],"date-time":"2021-10-05T08:47:32Z","timestamp":1633423652000},"page":"138-148","source":"Crossref","is-referenced-by-count":59,"title":["Gillis: Serving Large Neural Networks in Serverless Functions with Automatic Model Partitioning"],"prefix":"10.1109","author":[{"given":"Minchen","family":"Yu","sequence":"first","affiliation":[]},{"given":"Zhifeng","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Hok Chun","family":"Ng","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Ruichuan","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Encoding, fast and slow: Low-latency video processing using thousands of tiny threads","author":"fouladi","year":"2017","journal-title":"Proc USENIX NSDI"},{"key":"ref38","year":"0","journal-title":"How long does aws lambda keep your idle functions around before a cold start?"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref32","year":"0","journal-title":"Google Cloud Functions Pricing"},{"key":"ref31","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref30","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref37","year":"0","journal-title":"Keeping Functions Warm - How To Fix AWS Lambda Cold Start Issues"},{"key":"ref36","year":"0","journal-title":"MXNET Model Server"},{"key":"ref35","article-title":"Device placement optimization with reinforcement learning","author":"mirhoseini","year":"2017","journal-title":"Proc ACM ICML"},{"key":"ref34","article-title":"Spotlight: Optimizing device placement for training deep neural networks","author":"gao","year":"2018","journal-title":"Proceedings of the 35th International Conference on Machine Learning"},{"key":"ref28","year":"0","journal-title":"Autoscaling configuration of AWS SageMaker"},{"key":"ref27","year":"0","journal-title":"Open Neural Network Exchange (ONNX)"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E.2019.00-10"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3135974.3135993"},{"key":"ref1","article-title":"Clipper: A low-latency online prediction serving system","author":"crankshaw","year":"2017","journal-title":"Proc USENIX NSDI"},{"key":"ref20","article-title":"Distributing deep neural networks with containerized partitions at the edge","author":"zhou","year":"2019","journal-title":"Proc USENIX HotEdge"},{"key":"ref22","article-title":"Peeking behind the curtains of serverless platforms","author":"wang","year":"2018","journal-title":"Proc USENIX ATC"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2858384"},{"key":"ref24","year":"0","journal-title":"KNIX"},{"key":"ref23","article-title":"Serverless computing: One step forward, two steps back","author":"hellerstein","year":"2019","journal-title":"Proc CIDR"},{"key":"ref26","year":"0","journal-title":"Mxnet"},{"key":"ref25","article-title":"SAND: Towards high-performance serverless computing","author":"akkus","year":"2018","journal-title":"Proc USENIX ATC"},{"key":"ref50","year":"0","journal-title":"AWS Lambda increases function size"},{"key":"ref10","year":"0","journal-title":"AWS Lambda"},{"key":"ref11","year":"0","journal-title":"Google Cloud Functions"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref12","year":"0","journal-title":"Microsoft Azure Functions"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303953"},{"key":"ref14","article-title":"Wide residual networks","author":"zagoruyko","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref15","article-title":"Exploring the limits of language modeling","author":"jozefowicz","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref16","article-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding","author":"han","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref17","article-title":"Compressing deep convolutional networks using vector quantization","author":"gong","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref18","article-title":"Beyond data and model parallelism for deep neural networks","author":"jia","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref19","article-title":"Exploring hidden dimensions in parallelizing convolutional neural networks","author":"jia","year":"2018","journal-title":"Proceedings of the 35 th International Conference on Machine Learning"},{"key":"ref4","year":"0","journal-title":"Amazon Sagemaker"},{"key":"ref3","article-title":"MArk: Exploiting cloud services for cost-effective, SLO-aware machine learning inference serving","author":"zhang","year":"2019","journal-title":"Proc USENIX ATC"},{"key":"ref6","year":"0","journal-title":"Simplifying ML predictions with google cloud functions"},{"key":"ref5","year":"0","journal-title":"Seamlessly scale predictions with aws lambda and mxnet"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3366623.3368141"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E.2018.00052"},{"key":"ref49","year":"0","journal-title":"IBM Cloud Functions"},{"key":"ref9","article-title":"Cloud programming simplified: A Berkeley view on serverless computing","author":"jonas","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref46","year":"0","journal-title":"AWS Lambda Pricing"},{"key":"ref45","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref48","article-title":"A tutorial on bayesian optimization of expensive cost functions, with application to active user modeling and hierarchical reinforcement learning","author":"brochu","year":"2010","journal-title":"ArXiv Preprint"},{"key":"ref47","year":"0","journal-title":"AWS S3"},{"key":"ref42","article-title":"Cherrypick: Adaptively unearthing the best cloud configurations for big data analytics","author":"alipourfard","year":"2017","journal-title":"Proc USENIX NSDI"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-98144-4"},{"key":"ref44","author":"sutton","year":"2011","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref43","author":"mirhoseini","year":"2018","journal-title":"A hierarchical model for device placement"}],"event":{"name":"2021 IEEE 41st International Conference on Distributed Computing Systems (ICDCS)","location":"DC, USA","start":{"date-parts":[[2021,7,7]]},"end":{"date-parts":[[2021,7,10]]}},"container-title":["2021 IEEE 41st International Conference on Distributed Computing Systems (ICDCS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9546301\/9546401\/09546452.pdf?arnumber=9546452","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:46:37Z","timestamp":1652197597000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9546452\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1109\/icdcs51616.2021.00022","relation":{},"subject":[],"published":{"date-parts":[[2021,7]]}}}