{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:53:11Z","timestamp":1773193991284,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":79,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2224054, 2220211, 2312396, 2338512"],"award-info":[{"award-number":["2224054, 2220211, 2312396, 2338512"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,22]]},"DOI":"10.1145\/3627703.3650074","type":"proceedings-article","created":{"date-parts":[[2024,4,18]],"date-time":"2024-04-18T06:28:28Z","timestamp":1713421708000},"page":"505-523","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["GMorph: Accelerating Multi-DNN Inference via Model Fusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3101-6107","authenticated-orcid":false,"given":"Qizheng","family":"Yang","sequence":"first","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6777-2025","authenticated-orcid":false,"given":"Tianyi","family":"Yang","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7833-6755","authenticated-orcid":false,"given":"Mingcan","family":"Xiang","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1946-7105","authenticated-orcid":false,"given":"Lijun","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2963-8721","authenticated-orcid":false,"given":"Haoliang","family":"Wang","sequence":"additional","affiliation":[{"name":"Adobe Research, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3125-1841","authenticated-orcid":false,"given":"Marco","family":"Serafini","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9128-2231","authenticated-orcid":false,"given":"Hui","family":"Guan","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00081"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.731"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of machine learning and systems, 2:129--146","author":"Blalock Davis","year":"2020","unstructured":"Davis Blalock, Jose Javier Gonzalez Ortiz, Jonathan Frankle, and John Guttag. What is the state of neural network pruning? Proceedings of machine learning and systems, 2:129--146, 2020."},{"key":"e_1_3_2_1_4_1","volume-title":"Automated search for resource-efficient branched multi-task networks. arXiv preprint arXiv:2008.10292","author":"Bruggemann David","year":"2020","unstructured":"David Bruggemann, Menelaos Kanakis, Stamatios Georgoulis, and Luc Van Gool. Automated search for resource-efficient branched multi-task networks. arXiv preprint arXiv:2008.10292, 2020."},{"key":"e_1_3_2_1_5_1","volume-title":"Once-for-all: Train one network and specialize it for efficient deployment. arXiv preprint arXiv:1908.09791","author":"Cai Han","year":"2019","unstructured":"Han Cai, Chuang Gan, Tianzhe Wang, Zhekai Zhang, and Song Han. Once-for-all: Train one network and specialize it for efficient deployment. arXiv preprint arXiv:1908.09791, 2019."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007379606734"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50012-5"},{"key":"e_1_3_2_1_8_1","first-page":"578","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et al. {TVM}: An automated {End-to-End} optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pages 578--594, 2018."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00159"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00027"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09816-7"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.pmcj.2022.101594"},{"key":"e_1_3_2_1_13_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding, 2019."},{"key":"e_1_3_2_1_14_1","volume-title":"Overfitting and undercomputing in machine learning. ACM computing surveys (CSUR), 27(3):326--327","author":"Dietterich Tom","year":"1995","unstructured":"Tom Dietterich. Overfitting and undercomputing in machine learning. ACM computing surveys (CSUR), 27(3):326--327, 1995."},{"key":"e_1_3_2_1_15_1","volume-title":"Song Han. IOS: Inter-Operator Scheduler for CNN Acceleration. In Proceedings of Machine Learning and Systems","volume":"3","author":"Ding Yaoyao","year":"2021","unstructured":"Yaoyao Ding, Ligeng Zhu, Zhihao Jia, Gennady Pekhimenko, and Song Han. IOS: Inter-Operator Scheduler for CNN Acceleration. In Proceedings of Machine Learning and Systems, volume 3, 2021."},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Learning Representations","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. An image is worth 16x16 words: Transformers for image recognition at scale. In International Conference on Learning Representations, 2021."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2014.2359646"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3241539.3241559"},{"key":"e_1_3_2_1_20_1","first-page":"27503","article-title":"Efficiently identifying task groupings for multi-task learning","volume":"34","author":"Fifty Chris","year":"2021","unstructured":"Chris Fifty, Ehsan Amid, Zhe Zhao, Tianhe Yu, Rohan Anil, and Chelsea Finn. Efficiently identifying task groupings for multi-task learning. Advances in Neural Information Processing Systems, 34:27503--27516, 2021.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01156"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00332"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.005"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01453-z"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3314221.3314652"},{"key":"e_1_3_2_1_26_1","first-page":"3854","volume-title":"International Conference on Machine Learning","author":"Guo Pengsheng","year":"2020","unstructured":"Pengsheng Guo, Chen-Yu Lee, and Daniel Ulbricht. Learning to branch for multi-task learning. In International Conference on Machine Learning, pages 3854--3863. PMLR, 2020."},{"key":"e_1_3_2_1_27_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J Dally. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149, 2015."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_29_1","volume-title":"Dark knowledge. Presented as the keynote in BayLearn, 2(2)","author":"Hinton Geoffrey","year":"2014","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. Dark knowledge. Presented as the keynote in BayLearn, 2(2), 2014."},{"key":"e_1_3_2_1_30_1","volume-title":"System configuration and navigation of a guide dog robot: Toward animal guide dog-level guiding work. arXiv preprint arXiv:2210.13368","author":"Hwang Hochul","year":"2022","unstructured":"Hochul Hwang, Tim Xia, Ibrahima Keita, Ken Suzuki, Joydeep Biswas, Sunghoon I Lee, and Donghyun Kim. System configuration and navigation of a guide dog robot: Toward animal guide dog-level guiding work. arXiv preprint arXiv:2210.13368, 2022."},{"key":"e_1_3_2_1_31_1","volume-title":"Dynamic space-time scheduling for gpu inference. ArXiv, abs\/1901.00041","author":"Jain Paras","year":"2019","unstructured":"Paras Jain, Xiangxi Mo, Ajay Jain, Harikaran Subbaraj, Rehana Durrani, Alexey Tumanov, Joseph E. Gonzalez, and Ion Stoica. Dynamic space-time scheduling for gpu inference. ArXiv, abs\/1901.00041, 2019."},{"key":"e_1_3_2_1_32_1","volume-title":"Accelerating multi-model inference by merging dnns of different weights","author":"Jeong Joo Seong","year":"2020","unstructured":"Joo Seong Jeong, Soojeong Kim, Gyeong-In Yu, Yunseong Lee, and Byung-Gon Chun. Accelerating multi-model inference by merging dnns of different weights, 2020."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"e_1_3_2_1_34_1","first-page":"27","article-title":"Optimizing dnn computation with relaxed graph substitutions","volume":"1","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, James Thomas, Todd Warszawski, Mingyu Gao, Matei Zaharia, and Alex Aiken. Optimizing dnn computation with relaxed graph substitutions. Proceedings of Machine Learning and Systems, 1:27--39, 2019.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_35_1","first-page":"1","volume-title":"Proceedings of the Fourteenth EuroSys Conference 2019","author":"Kim Youngsok","year":"2019","unstructured":"Youngsok Kim, Joonsung Kim, Dongju Chae, Daehyun Kim, and Jangwoo Kim. &mu;layer: Low latency on-device inference using cooperative single-layer acceleration and processor-friendly quantization. In Proceedings of the Fourteenth EuroSys Conference 2019, pages 1--15, 2019."},{"key":"e_1_3_2_1_36_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980, 2014."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CENIM48368.2019.8973353"},{"key":"e_1_3_2_1_38_1","volume-title":"et al. Xrbench: An extended reality (xr) machine learning benchmark suite for the metaverse. arXiv preprint arXiv:2211.08675","author":"Kwon Hyoukjun","year":"2022","unstructured":"Hyoukjun Kwon, Krishnakumar Nair, Jamin Seo, Jason Yik, Debabrata Mohapatra, Dongyuan Zhan, Jinook Song, Peter Capak, Peizhao Zhang, Peter Vajda, et al. Xrbench: An extended reality (xr) machine learning benchmark suite for the metaverse. arXiv preprint arXiv:2211.08675, 2022."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC45102.2020.9294676"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386901.3388947"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2015.7301352"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830587"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3030548"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-65414-6_13"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1441"},{"key":"e_1_3_2_1_46_1","first-page":"1594","volume-title":"Advances in Neural Information Processing Systems","author":"Long Mingsheng","year":"2017","unstructured":"Mingsheng Long, Zhangjie Cao, Jianmin Wang, and S Yu Philip. Learning multiple tasks with multilinear relationship networks. In Advances in Neural Information Processing Systems, pages 1594--1603, 2017."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3081333.3081359"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.433"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794220"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454083"},{"key":"e_1_3_2_1_51_1","volume-title":"Nvidia Multi Process Service. https:\/\/docs.nvidia.com\/pdf\/CUDA_Multi_Process_Service_Overview.pdf","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. Nvidia Multi Process Service. https:\/\/docs.nvidia.com\/pdf\/CUDA_Multi_Process_Service_Overview.pdf, 2020."},{"key":"e_1_3_2_1_52_1","volume-title":"NeurIPS","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zach DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. Pytorch: An imperative style, high-performance deep learning library. In NeurIPS, 2019."},{"key":"e_1_3_2_1_53_1","volume-title":"Parallel simulated annealing algorithms. Journal of parallel and distributed computing, 37(2):207--212","author":"Ram D Janaki","year":"1996","unstructured":"D Janaki Ram, TH Sreenivas, and K Ganapathy Subramaniam. Parallel simulated annealing algorithms. Journal of parallel and distributed computing, 37(2):207--212, 1996."},{"key":"e_1_3_2_1_54_1","volume-title":"An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098","author":"Ruder Sebastian","year":"2017","unstructured":"Sebastian Ruder. An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098, 2017."},{"key":"e_1_3_2_1_55_1","volume-title":"Latent multi-task architecture learning","author":"Ruder Sebastian","year":"2018","unstructured":"Sebastian Ruder, Joachim Bingel, Isabelle Augenstein, and Anders S\u00f8gaard. Latent multi-task architecture learning, 2018."},{"key":"e_1_3_2_1_56_1","volume-title":"Computing and estimating the rate of convergence","author":"Senning Jonathan R","year":"2007","unstructured":"Jonathan R Senning. Computing and estimating the rate of convergence, 2007."},{"key":"e_1_3_2_1_57_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_58_1","first-page":"9120","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Standley Trevor","year":"2020","unstructured":"Trevor Standley, Amir Zamir, Dawn Chen, Leonidas Guibas, Jitendra Malik, and Silvio Savarese. Which tasks should be learned together in multi-task learning? In Proceedings of the International Conference on Machine Learning, pages 9120--9132. PMLR, 2020."},{"key":"e_1_3_2_1_59_1","volume-title":"Adashare: Learning what to share for efficient deep multi-task learning. arXiv preprint arXiv:1911.12423","author":"Sun Ximeng","year":"2019","unstructured":"Ximeng Sun, Rameswar Panda, Rogerio Feris, and Kate Saenko. Adashare: Learning what to share for efficient deep multi-task learning. arXiv preprint arXiv:1911.12423, 2019."},{"key":"e_1_3_2_1_60_1","volume-title":"Regularizing deep multi-task networks using orthogonal gradients. arXiv preprint arXiv:1912.06844","author":"Suteu Mihai","year":"2019","unstructured":"Mihai Suteu and Yike Guo. Regularizing deep multi-task networks using orthogonal gradients. arXiv preprint arXiv:1912.06844, 2019."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-015-7744-1_2"},{"key":"e_1_3_2_1_62_1","volume-title":"Bert De Brabandere, and Luc Van Gool. Branched multi-task networks: deciding what layers to share. arXiv preprint arXiv:1904.02920","author":"Vandenhende Simon","year":"2019","unstructured":"Simon Vandenhende, Stamatios Georgoulis, Bert De Brabandere, and Luc Van Gool. Branched multi-task networks: deciding what layers to share. arXiv preprint arXiv:1904.02920, 2019."},{"key":"e_1_3_2_1_63_1","volume-title":"Marc Proesmans, Dengxin Dai, and Luc Van Gool. Multi-task learning for dense prediction tasks: A survey","author":"Vandenhende Simon","year":"2021","unstructured":"Simon Vandenhende, Stamatios Georgoulis, Wouter Van Gansbeke, Marc Proesmans, Dengxin Dai, and Luc Van Gool. Multi-task learning for dense prediction tasks: A survey. IEEE transactions on pattern analysis and machine intelligence, 2021."},{"key":"e_1_3_2_1_64_1","first-page":"2","volume-title":"GPU Technology Conference","volume":"1","author":"Vanholder Han","year":"2016","unstructured":"Han Vanholder. Efficient inference with tensorrt. In GPU Technology Conference, volume 1, page 2, 2016."},{"key":"e_1_3_2_1_65_1","first-page":"550","article-title":"Residual networks behave like ensembles of relatively shallow networks","volume":"29","author":"Veit Andreas","year":"2016","unstructured":"Andreas Veit, Michael J Wilber, and Serge Belongie. Residual networks behave like ensembles of relatively shallow networks. Advances in Neural Information Processing Systems, 29:550--558, 2016.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"e_1_3_2_1_67_1","first-page":"37","volume-title":"15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21)","author":"Wang Haojie","year":"2021","unstructured":"Haojie Wang, Jidong Zhai, Mingyu Gao, Zixuan Ma, Shizhi Tang, Liyan Zheng, Yuanzhi Li, Kaiyuan Rong, Yuanyong Chen, and Zhihao Jia. PET: Optimizing tensor programs with partially equivalent transformations and automated corrections. In 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21), pages 37--54. USENIX Association, July 2021."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_69_1","volume-title":"Fbnetv5: Neural architecture search for multiple tasks in one run. arXiv preprint arXiv:2111.10007","author":"Wu Bichen","year":"2021","unstructured":"Bichen Wu, Chaojian Li, Hang Zhang, Xiaoliang Dai, Peizhao Zhang, Matthew Yu, Jialiang Wang, Yingyan Lin, and Peter Vajda. Fbnetv5: Neural architecture search for multiple tasks in one run. arXiv preprint arXiv:2111.10007, 2021."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS46320.2019.00042"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419192"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643501"},{"key":"e_1_3_2_1_73_1","volume-title":"A survey of multi-tenant deep learning inference on gpu","author":"Yu Fuxun","year":"2022","unstructured":"Fuxun Yu, Di Wang, Longfei Shangguan, Minjia Zhang, Chenchen Liu, and Xiang Chen. A survey of multi-tenant deep learning inference on gpu, 2022."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2983149"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299031"},{"key":"e_1_3_2_1_76_1","volume-title":"Automtl: A programming framework for automated multi-task learning. arXiv preprint arXiv:2110.13076","author":"Zhang Lijun","year":"2021","unstructured":"Lijun Zhang, Xiao Liu, and Hui Guan. Automtl: A programming framework for automated multi-task learning. arXiv preprint arXiv:2110.13076, 2021."},{"key":"e_1_3_2_1_77_1","volume-title":"A tree-structured multi-task model recommender. arXiv preprint arXiv:2203.05092","author":"Zhang Lijun","year":"2022","unstructured":"Lijun Zhang, Xiao Liu, and Hui Guan. A tree-structured multi-task model recommender. arXiv preprint arXiv:2203.05092, 2022."},{"key":"e_1_3_2_1_78_1","article-title":"A survey on multi-task learning","author":"Zhang Yu","year":"2021","unstructured":"Yu Zhang and Qiang Yang. A survey on multi-task learning. IEEE Transactions on Knowledge and Data Engineering, 2021.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.463"}],"event":{"name":"EuroSys '24: Nineteenth European Conference on Computer Systems","location":"Athens Greece","acronym":"EuroSys '24","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Nineteenth European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3650074","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627703.3650074","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:11:48Z","timestamp":1755825108000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3650074"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,22]]},"references-count":79,"alternative-id":["10.1145\/3627703.3650074","10.1145\/3627703"],"URL":"https:\/\/doi.org\/10.1145\/3627703.3650074","relation":{},"subject":[],"published":{"date-parts":[[2024,4,22]]},"assertion":[{"value":"2024-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}