{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T13:33:29Z","timestamp":1769520809167,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T00:00:00Z","timestamp":1699747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3624062.3624238","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T13:53:39Z","timestamp":1699624419000},"page":"1606-1613","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Fine-grained accelerator partitioning for Machine Learning and Scientific Computing in Function as a Service Platform"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8297-8525","authenticated-orcid":false,"given":"Aditya","family":"Dhakal","sequence":"first","affiliation":[{"name":"Hewlett Packard Labs, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3293-9437","authenticated-orcid":false,"given":"Philipp","family":"Raith","sequence":"additional","affiliation":[{"name":"Hewlett Packard Labs, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1323-5939","authenticated-orcid":false,"given":"Logan","family":"Ward","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5652-4408","authenticated-orcid":false,"given":"Rolando P.","family":"Hong Enriquez","sequence":"additional","affiliation":[{"name":"Hewlett Packard Labs, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0373-1867","authenticated-orcid":false,"given":"Gourav","family":"Rattihalli","sequence":"additional","affiliation":[{"name":"Hewlett Packard Labs, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7370-4805","authenticated-orcid":false,"given":"Kyle","family":"Chard","sequence":"additional","affiliation":[{"name":"University of Chicago, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2129-5269","authenticated-orcid":false,"given":"Ian","family":"Foster","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9830-8588","authenticated-orcid":false,"given":"Dejan","family":"Milojicic","sequence":"additional","affiliation":[{"name":"Hewlett Packard Labs, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,11,12]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"2023. Multi-Site Active Learning for IP Optimization. https:\/\/github.com\/exalearn\/multi-site-campaigns\/tree\/main\/molecular-design. Accessed: 16\/08\/2023."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307681.3325400"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MDAT.2022.3161126"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3369583.3392683"},{"key":"e_1_3_2_2_5_1","volume-title":"Proceedings of Machine Learning and Systems, D.\u00a0Marculescu, Y.\u00a0Chi, and C.\u00a0Wu (Eds.). Vol.\u00a04. 20\u201332","author":"Cho Junguk","year":"2022","unstructured":"Junguk Cho, Diman Zad\u00a0Tootaghaj, Lianjie Cao, and Puneet Sharma. 2022. SLA-Driven ML INFERENCE FRAMEWORK FOR CLOUDS WITH HETEROGENEOUS ACCELERATORS. In Proceedings of Machine Learning and Systems, D.\u00a0Marculescu, Y.\u00a0Chi, and C.\u00a0Wu (Eds.). Vol.\u00a04. 20\u201332. https:\/\/proceedings.mlsys.org\/paper_files\/paper\/2022\/file\/bcf9bef61a534d0ce4a3c55f09dfcc29-Paper.pdf"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/P3HPC56579.2022.00014"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLOUD.2019.00067"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"J. Deng W. Dong R. Socher L.-J. Li K. Li and L. Fei-Fei. 2009. ImageNet: A Large-Scale Hierarchical Image Database. In CVPR09.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"key":"e_1_3_2_2_10_1","volume-title":"D-STACK: High Throughput DNN Inference by Effective Multiplexing and Spatio-Temporal Scheduling of GPUs. arXiv preprint arXiv:2304.13541","author":"Dhakal Aditya","year":"2023","unstructured":"Aditya Dhakal, Sameer\u00a0G Kulkarni, and KK Ramakrishnan. 2023. D-STACK: High Throughput DNN Inference by Effective Multiplexing and Spatio-Temporal Scheduling of GPUs. arXiv preprint arXiv:2304.13541 (2023)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CloudNet51028.2020.9335804"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICNP49622.2020.9259361"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00077"},{"key":"e_1_3_2_2_14_1","volume-title":"Deep Residual Learning for Image Recognition. arXiv preprint arXiv:1512.03385","author":"He Kaiming","year":"2015","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. arXiv preprint arXiv:1512.03385 (2015)."},{"key":"e_1_3_2_2_15_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2015. Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification. arxiv:1502.01852\u00a0[cs.CV]"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCE56470.2023.10043587"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00042"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"Jie Li George Michelogiannakis Brandon Cook Dulanya Cooray and Yong Chen. 2023. Analyzing Resource Utilization in\u00a0an\u00a0HPC System: A Case Study of\u00a0NERSC\u2019s Perlmutter. In High Performance Computing Abhinav Bhatele Jeff Hammond Marc Baboulin and Carola Kruse (Eds.). Springer Nature Switzerland Cham 297\u2013316.","DOI":"10.1007\/978-3-031-32041-5_16"},{"key":"e_1_3_2_2_19_1","volume-title":"2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Li Jie","year":"2022","unstructured":"Jie Li, Laiping Zhao, Yanan Yang, Kunlin Zhan, and Keqiu Li. 2022. Tetris: Memory-efficient serverless inference through tensor sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22)."},{"key":"e_1_3_2_2_20_1","unstructured":"NVIDIA. 2023. Multiprocess Service. https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf. Accessed: 15\/08\/2023."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysarc.2022.102561"},{"key":"e_1_3_2_2_22_1","volume-title":"Molecular Sets (MOSES): A Benchmarking Platform for Molecular Generation Models. Frontiers in Pharmacology","author":"Polykovskiy Daniil","year":"2020","unstructured":"Daniil Polykovskiy, Alexander Zhebrak, Benjamin Sanchez-Lengeling, Sergey Golovanov, Oktai Tatanov, Stanislav Belyaev, Rauf Kurbanov, Aleksey Artamonov, Vladimir Aladinskiy, Mark Veselov, Artur Kadurin, Simon Johansson, Hongming Chen, Sergey Nikolenko, Alan Aspuru-Guzik, and Alex Zhavoronkov. 2020. Molecular Sets (MOSES): A Benchmarking Platform for Molecular Generation Models. Frontiers in Pharmacology (2020)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/MIC.2023.3260939"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/UCC56403.2022.00023"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503221.3508407"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507750"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_2_28_1","unstructured":"Lukas Tobler. 2022. GPUless \u2013 Serverless GPU Functions. Master\u2019s thesis. ETH."},{"key":"e_1_3_2_2_29_1","volume-title":"Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_2_30_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","unstructured":"Logan Ward Ganesh Sivaraman J.\u00a0Gregory Pauloski Yadu Babuji Ryan Chard Naveen Dandu Paul\u00a0C. Redfern Rajeev\u00a0S. Assary Kyle Chard Larry\u00a0A. Curtiss Rajeev Thakur and Ian Foster. 2021. Colmena: Scalable Machine-Learning-Based Steering of Ensemble Simulations for High Performance Computing. In 2021 IEEE\/ACM Workshop on Machine Learning in High Performance Computing Environments (MLHPC). 9\u201320. https:\/\/doi.org\/10.1109\/MLHPC54614.2021.00007","DOI":"10.1109\/MLHPC54614.2021.00007"},{"key":"e_1_3_2_2_32_1","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Wu Bingyang","year":"2023","unstructured":"Bingyang Wu, Zili Zhang, Zhihao Bai, Xuanzhe Liu, and Xin Jin. 2023. Transparent { GPU} Sharing in Container Clouds for Deep Learning Workloads. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). 69\u201385."}],"event":{"name":"SC-W 2023: Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis","location":"Denver CO USA","acronym":"SC-W 2023"},"container-title":["Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624238","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624238","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T03:04:16Z","timestamp":1755745456000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624238"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,12]]},"references-count":32,"alternative-id":["10.1145\/3624062.3624238","10.1145\/3624062"],"URL":"https:\/\/doi.org\/10.1145\/3624062.3624238","relation":{},"subject":[],"published":{"date-parts":[[2023,11,12]]},"assertion":[{"value":"2023-11-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}