{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T10:13:20Z","timestamp":1767262400069,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100014188","name":"Ministry of Science and ICT, South Korea","doi-asserted-by":"publisher","award":["2022R1A5A1027646","2022-0-00420"],"award-info":[{"award-number":["2022R1A5A1027646","2022-0-00420"]}],"id":[{"id":"10.13039\/501100014188","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004358","name":"Samsung","doi-asserted-by":"publisher","award":["IO220808-01782-01"],"award-info":[{"award-number":["IO220808-01782-01"]}],"id":[{"id":"10.13039\/100004358","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3643832.3661885","type":"proceedings-article","created":{"date-parts":[[2024,6,4]],"date-time":"2024-06-04T17:14:23Z","timestamp":1717521263000},"page":"412-424","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["CoActo: CoActive Neural Network Inference Offloading with Fine-grained and Concurrent Execution"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5643-6688","authenticated-orcid":false,"given":"Kyungmin","family":"Bin","sequence":"first","affiliation":[{"name":"Seoul National University, Seoul, Korea, South ? Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3910-7182","authenticated-orcid":false,"given":"Jongseok","family":"Park","sequence":"additional","affiliation":[{"name":"Seoul National University, Seoul, Korea, South ? Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3975-2566","authenticated-orcid":false,"given":"Chanjeong","family":"Park","sequence":"additional","affiliation":[{"name":"Seoul National University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4959-8165","authenticated-orcid":false,"given":"Seyeon","family":"Kim","sequence":"additional","affiliation":[{"name":"University of Colorado Boulder, Boulder, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8647-1476","authenticated-orcid":false,"given":"Kyunghan","family":"Lee","sequence":"additional","affiliation":[{"name":"Seoul National University, Seoul, Korea, South ? Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2024,6,4]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"265","volume-title":"Proceedings of the 12th USENIX Symposium on Operating Systems Design and Implementation","author":"Abadi M.","year":"2016","unstructured":"Abadi, M., Barham, P., Chen, J., Chen, Z., Davis, A., Dean, J., Devin, M., Ghemawat, S., Irving, G., Isard, M., et al. Tensorflow: a system for large-scale machine learning. In Proceedings of the 12th USENIX Symposium on Operating Systems Design and Implementation (2016), pp. 265--283."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.5555\/3195638.3195664"},{"key":"e_1_3_2_1_3_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown T.","year":"2020","unstructured":"Brown, T., Mann, B., Ryder, N., Subbiah, M., Kaplan, J. D., Dhariwal, P., Neelakantan, A., Shyam, P., Sastry, G., Askell, A., et al. Language models are few-shot learners. Advances in Neural Information Processing Systems 33 (2020), 1877--1901.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_4_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin J.","year":"2018","unstructured":"Devlin, J., Chang, M.-W., Lee, K., and Toutanova, K. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/77626.79170"},{"key":"e_1_3_2_1_6_1","first-page":"2","article-title":"Jointdnn: An efficient training and inference engine for intelligent mobile cloud computing services","volume":"20","author":"Eshratifar A. E.","year":"2019","unstructured":"Eshratifar, A. E., Abrishami, M. S., and Pedram, M. Jointdnn: An efficient training and inference engine for intelligent mobile cloud computing services. IEEE Transactions on Mobile Computing 20, 2 (2019), 565--576.","journal-title":"IEEE Transactions on Mobile Computing"},{"key":"e_1_3_2_1_7_1","first-page":"443","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Gujarati A.","year":"2020","unstructured":"Gujarati, A., Karimi, R., Alzayat, S., Hao, W., Kaufmann, A., Vigfusson, Y., and Mace, J. Serving {DNNs} like clockwork: Performance predictability from the bottom up. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20) (2020), pp. 443--462."},{"key":"e_1_3_2_1_8_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han S.","year":"2015","unstructured":"Han, S., Mao, H., and Dally, W. J. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737614"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3560551"},{"key":"e_1_3_2_1_12_1","unstructured":"Hubert B. Linux traffic control (tc). https:\/\/manpages.ubuntu.com\/manpages\/xenial\/man8\/tc.8.html."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3267828"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3093337.3037698"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419194"},{"key":"e_1_3_2_1_17_1","first-page":"1","article-title":"Edge ai: On-demand accelerating deep neural network inference via edge computing","volume":"19","author":"Li E.","year":"2019","unstructured":"Li, E., Zeng, L., Zhou, Z., and Chen, X. Edge ai: On-demand accelerating deep neural network inference via edge computing. IEEE Transactions on Wireless Communications 19, 1 (2019), 447--457.","journal-title":"IEEE Transactions on Wireless Communications"},{"key":"e_1_3_2_1_18_1","first-page":"881","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Ma L.","year":"2020","unstructured":"Ma, L., Xie, Z., Yang, Z., Xue, J., Miao, Y., Cui, W., Hu, W., Yang, F., Zhang, L., and Zhou, L. Rammer: Enabling holistic deep learning compiler optimizations with {rTasks}. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20) (2020), pp. 881--897."},{"key":"e_1_3_2_1_19_1","volume-title":"https:\/\/openai.com\/chatgpt","author":"Open AI.","year":"2023","unstructured":"OpenAI. Chatgpt. https:\/\/openai.com\/chatgpt, 2023."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538940"},{"key":"e_1_3_2_1_21_1","first-page":"36","article-title":"Aspen: Breaking operator barriers for efficient parallelization of deep neural networks","author":"Park J.","year":"2024","unstructured":"Park, J., Bin, K., Park, G., Ha, S., and Lee, K. Aspen: Breaking operator barriers for efficient parallelization of deep neural networks. Advances in Neural Information Processing Systems 36 (2024).","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3281411.3281430"},{"key":"e_1_3_2_1_23_1","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume":"32","author":"Paszke A.","year":"2019","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., Desmaison, A., Kopf, A., Yang, E., DeVito, Z., Raison, M., Tejani, A., Chilamkurthy, S., Steiner, B., Fang, L., Bai, J., and Chintala, S. Pytorch: An imperative style, high-performance deep learning library. Advances in Neural Information Processing Systems 32 (2019), 8026--8037.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","first-page":"8821","volume-title":"International Conference on Machine Learning","author":"Ramesh A.","year":"2021","unstructured":"Ramesh, A., Pavlov, M., Goh, G., Gray, S., Voss, C., Radford, A., Chen, M., and Sutskever, I. Zero-shot text-to-image generation. In International Conference on Machine Learning (2021), PMLR, pp. 8821--8831."},{"key":"e_1_3_2_1_25_1","unstructured":"Redmon J. Darknet: Open source neural networks in c. http:\/\/pjreddie.com\/darknet\/ 2013--2016."},{"key":"e_1_3_2_1_26_1","volume-title":"Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767","author":"Redmon J.","year":"2018","unstructured":"Redmon, J., and Farhadi, A. Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767 (2018)."},{"key":"e_1_3_2_1_27_1","first-page":"701","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Shi Y.","year":"2023","unstructured":"Shi, Y., Yang, Z., Xue, J., Ma, L., Xia, Y., Miao, Z., Guo, Y., Yang, F., and Zhou, L. Welder: Scheduling deep learning memory access via tile-graph. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23) (2023), pp. 701--718."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2005.64"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2014.110"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/71.993206"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3448625"},{"key":"e_1_3_2_1_33_1","first-page":"945","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Weng Q.","year":"2022","unstructured":"Weng, Q., Xiao, W., Yu, Y., Wang, W., Wang, C., He, J., Li, Y., Zhang, L., Lin, W., and Ding, Y. {MLaaS} in the wild: Workload analysis and scheduling in {Large-Scale} heterogeneous {GPU} clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22) (2022), pp. 945--960."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2785956.2787498"},{"key":"e_1_3_2_1_35_1","first-page":"2","article-title":"Coedge: Cooperative dnn inference with adaptive workload partitioning over heterogeneous edge devices","volume":"29","author":"Zeng L.","year":"2020","unstructured":"Zeng, L., Chen, X., Zhou, Z., Yang, L., and Zhang, J. Coedge: Cooperative dnn inference with adaptive workload partitioning over heterogeneous edge devices. IEEE\/ACM Transactions on Networking 29, 2 (2020), 595--608.","journal-title":"IEEE\/ACM Transactions on Networking"},{"key":"e_1_3_2_1_36_1","first-page":"787","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Zhang H.","year":"2023","unstructured":"Zhang, H., Tang, Y., Khandelwal, A., and Stoica, I. {SHEPHERD}: Serving {DNNs} in the wild. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23) (2023), pp. 787--808."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458864.3467882"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3058532"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2858384"},{"key":"e_1_3_2_1_40_1","first-page":"2","article-title":"Accelerating deep learning inference via model parallelism and partial computation offloading","volume":"34","author":"Zhou H.","year":"2022","unstructured":"Zhou, H., Li, M., Wang, N., Min, G., and Wu, J. Accelerating deep learning inference via model parallelism and partial computation offloading. IEEE Transactions on Parallel and Distributed Systems 34, 2 (2022), 475--488.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_41_1","first-page":"233","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Zhu H.","year":"2022","unstructured":"Zhu, H., Wu, R., Diao, Y., Ke, S., Li, H., Zhang, C., Xue, J., Ma, L., Xia, Y., Cui, W., et al. {ROLLER}: Fast and efficient tensor compilation for deep learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22) (2022), pp. 233--248."}],"event":{"name":"MOBISYS '24: 22nd Annual International Conference on Mobile Systems, Applications and Services","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"],"location":"Minato-ku, Tokyo Japan","acronym":"MOBISYS '24"},"container-title":["Proceedings of the 22nd Annual International Conference on Mobile Systems, Applications and Services"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643832.3661885","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643832.3661885","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:07Z","timestamp":1750291387000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643832.3661885"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":41,"alternative-id":["10.1145\/3643832.3661885","10.1145\/3643832"],"URL":"https:\/\/doi.org\/10.1145\/3643832.3661885","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}