{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T14:43:24Z","timestamp":1775745804184,"version":"3.50.1"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031484230","type":"print"},{"value":"9783031484247","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-48424-7_18","type":"book-chapter","created":{"date-parts":[[2023,11,21]],"date-time":"2023-11-21T20:03:21Z","timestamp":1700597001000},"page":"242-258","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Octopus: SLO-Aware Progressive Inference Serving via\u00a0Deep Reinforcement Learning in\u00a0Multi-tenant Edge Cluster"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2539-8257","authenticated-orcid":false,"given":"Ziyang","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Yang","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,20]]},"reference":[{"key":"18_CR1","unstructured":"Choi, S., Lee, S., Kim, Y., Park, J., Kwon, Y., Huh, J.: Serving heterogeneous machine learning models on $$\\{$$Multi-GPU$$\\}$$ servers with $$\\{$$Spatio-Temporal$$\\}$$ sharing. In: 2022 USENIX Annual Technical Conference (USENIX ATC 2022), pp. 199\u2013216 (2022)"},{"key":"18_CR2","unstructured":"Christodoulou, P.: Soft actor-critic for discrete action settings. arXiv preprint arXiv:1910.07207 (2019)"},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"Dong, F., et al.: Multi-exit DNN inference acceleration based on multi-dimensional optimization for edge intelligence. IEEE Trans. Mob. Comput. (2022)","DOI":"10.1109\/TMC.2022.3172402"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Faggioli, D., Trimarchi, M., Checconi, F., Bertogna, M., Mancina, A.: An implementation of the earliest deadline first algorithm in linux. In: Proceedings of the 2009 ACM Symposium on Applied Computing, pp. 1984\u20131989 (2009)","DOI":"10.1145\/1529282.1529723"},{"key":"18_CR5","unstructured":"Gujarati, A., et al.: Serving $$\\{$$DNNs$$\\}$$ like clockwork: performance predictability from the bottom up. In: 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 2020), pp. 443\u2013462 (2020)"},{"issue":"1","key":"18_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3546192","volume":"23","author":"J Hao","year":"2023","unstructured":"Hao, J., Subedi, P., Ramaswamy, L., Kim, I.K.: Reaching for the sky: maximizing deep learning inference throughput on edge devices with AI multi-tenancy. ACM Trans. Internet Technol. 23(1), 1\u201333 (2023)","journal-title":"ACM Trans. Internet Technol."},{"issue":"8","key":"18_CR7","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Jeon, S., Choi, Y., Cho, Y., Cha, H.: Harvnet: resource-optimized operation of multi-exit deep neural networks on energy harvesting devices. In: Proceedings of the 21st Annual International Conference on Mobile Systems, Applications and Services, pp. 42\u201355 (2023)","DOI":"10.1145\/3581791.3596845"},{"key":"18_CR9","doi-asserted-by":"crossref","unstructured":"Jeong, J.S., et al.: Band: coordinated multi-DNN inference on heterogeneous mobile processors. In: Proceedings of the 20th Annual International Conference on Mobile Systems, Applications and Services, pp. 235\u2013247 (2022)","DOI":"10.1145\/3498361.3538948"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"Laskaridis, S., Venieris, S.I., Almeida, M., Leontiadis, I., Lane, N.D.: Spinn: synergistic progressive inference of neural networks over device and cloud. In: Proceedings of the 26th Annual International Conference on Mobile Computing and Networking, pp. 1\u201315 (2020)","DOI":"10.1145\/3372224.3419194"},{"issue":"1","key":"18_CR11","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1109\/TWC.2019.2946140","volume":"19","author":"E Li","year":"2019","unstructured":"Li, E., Zeng, L., Zhou, Z., Chen, X.: Edge AI: on-demand accelerating deep neural network inference via edge computing. IEEE Trans. Wireless Commun. 19(1), 447\u2013457 (2019)","journal-title":"IEEE Trans. Wireless Commun."},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Liang, Q., Hanafy, W.A., Bashir, N., Ali-Eldin, A., Irwin, D., Shenoy, P.: D\u011blen: enabling flexible and adaptive model-serving for multi-tenant edge AI. In: Proceedings of the 8th ACM\/IEEE Conference on Internet of Things Design and Implementation, pp. 209\u2013221 (2023)","DOI":"10.1145\/3576842.3582375"},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Ling, N., Huang, X., Zhao, Z., Guan, N., Yan, Z., Xing, G.: Blastnet: exploiting duo-blocks for cross-processor real-time DNN inference. In: Proceedings of the 20th ACM Conference on Embedded Networked Sensor Systems, pp. 91\u2013105 (2022)","DOI":"10.1145\/3560905.3568520"},{"key":"18_CR14","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lan, G., Stojkovic, J., Zhang, Y., Joe-Wong, C., Gorlatova, M.: Collabar: edge-assisted collaborative image recognition for mobile augmented reality. In: 2020 19th ACM\/IEEE International Conference on Information Processing in Sensor Networks (IPSN), pp. 301\u2013312. IEEE (2020)","DOI":"10.1109\/IPSN48710.2020.00-26"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Mohammed, T., Joe-Wong, C., Babbar, R., Di Francesco, M.: Distributed inference acceleration with adaptive DNN partitioning and offloading. In: IEEE INFOCOM 2020-IEEE Conference on Computer Communications, pp. 854\u2013863. IEEE (2020)","DOI":"10.1109\/INFOCOM41043.2020.9155237"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Nigade, V., Bauszat, P., Bal, H., Wang, L.: Jellyfish: timely inference serving for dynamic edge networks. In: 2022 IEEE Real-Time Systems Symposium (RTSS), pp. 277\u2013290. IEEE (2022)","DOI":"10.1109\/RTSS55097.2022.00032"},{"issue":"4","key":"18_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3460352","volume":"18","author":"W Seo","year":"2021","unstructured":"Seo, W., Cha, S., Kim, Y., Huh, J., Park, J.: SLO-aware inference scheduler for heterogeneous processors in edge platforms. ACM Trans. Archit. Code Optim. 18(4), 1\u201326 (2021)","journal-title":"ACM Trans. Archit. Code Optim."},{"issue":"5","key":"18_CR18","doi-asserted-by":"publisher","first-page":"637","DOI":"10.1109\/JIOT.2016.2579198","volume":"3","author":"W Shi","year":"2016","unstructured":"Shi, W., Cao, J., Zhang, Q., Li, Y., Xu, L.: Edge computing: vision and challenges. IEEE Internet Things J. 3(5), 637\u2013646 (2016)","journal-title":"IEEE Internet Things J."},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Teerapittayanon, S., McDanel, B., Kung, H.T.: Branchynet: fast inference via early exiting from deep neural networks. In: 2016 23rd International Conference on Pattern Recognition (ICPR), pp. 2464\u20132469. IEEE (2016)","DOI":"10.1109\/ICPR.2016.7900006"},{"key":"18_CR20","doi-asserted-by":"crossref","unstructured":"Teng, S., et al.: Motion planning for autonomous driving: the state of the art and future perspectives. IEEE Trans. Intell. Veh. (2023)","DOI":"10.1109\/TIV.2023.3274536"},{"key":"18_CR21","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"issue":"12","key":"18_CR22","doi-asserted-by":"publisher","first-page":"4499","DOI":"10.1109\/TPDS.2022.3195664","volume":"33","author":"J Wu","year":"2022","unstructured":"Wu, J., Wang, L., Pei, Q., Cui, X., Liu, F., Yang, T.: HiTDL: high-throughput deep learning inference at the hybrid mobile edge. IEEE Trans. Parallel Distrib. Syst. 33(12), 4499\u20134514 (2022)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"18_CR23","unstructured":"Yang, Z., Nahrstedt, K., Guo, H., Zhou, Q.: Deeprt: a soft real time scheduler for computer vision applications on the edge. In: 2021 IEEE\/ACM Symposium on Edge Computing (SEC), pp. 271\u2013284. IEEE (2021)"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: ELF: accelerate high-resolution mobile deep vision with content-aware parallel offloading. In: Proceedings of the 27th Annual International Conference on Mobile Computing and Networking, pp. 201\u2013214 (2021)","DOI":"10.1145\/3447993.3448628"},{"issue":"8","key":"18_CR25","doi-asserted-by":"publisher","first-page":"1738","DOI":"10.1109\/JPROC.2019.2918951","volume":"107","author":"Z Zhou","year":"2019","unstructured":"Zhou, Z., Chen, X., Li, E., Zeng, L., Luo, K., Zhang, J.: Edge intelligence: paving the last mile of artificial intelligence with edge computing. Proc. IEEE 107(8), 1738\u20131762 (2019)","journal-title":"Proc. IEEE"}],"container-title":["Lecture Notes in Computer Science","Service-Oriented Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-48424-7_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,21]],"date-time":"2023-11-21T20:17:49Z","timestamp":1700597869000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-48424-7_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031484230","9783031484247"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-48424-7_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"20 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICSOC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Service-Oriented Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Rome","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icsoc2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icsoc2023.diag.uniroma1.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"ConfTool","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"208","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"35","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"other papers accepted: 3 industry full papers, 3 keynote abstracts (in the front matter)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}