{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:34:10Z","timestamp":1742913250388,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":18,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819708338"},{"type":"electronic","value":"9789819708345"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-0834-5_8","type":"book-chapter","created":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T02:02:48Z","timestamp":1710208968000},"page":"109-129","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DeepLat: Achieving Minimum Worst Case Latency for\u00a0DNN Inference with\u00a0Batch-Aware Dispatching"],"prefix":"10.1007","author":[{"given":"Jiaheng","family":"Gao","sequence":"first","affiliation":[]},{"given":"Yitao","family":"Hu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,12]]},"reference":[{"issue":"7553","key":"8_CR1","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"key":"8_CR2","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. MIT Press (2016)"},{"key":"8_CR3","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Adv. Neural Inf. Process. Syst. 25 (2012)"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Hirschberg, J., Manning, C.D.: Advances in natural language processing. Science 349(6245), 261\u2013266 (2015)","DOI":"10.1126\/science.aaa8685"},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Covington, P., Adams, J., Sargin, E.: Deep neural networks for youtube recommendations. In: Proceedings of the 10th ACM Conference on Recommender Systems, pp. 191\u2013198 (2016)","DOI":"10.1145\/2959100.2959190"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Shen, H., et al.: Nexus: a gpu cluster engine for accelerating dnn-based video analysis. In: Proceedings of the 27th ACM Symposium on Operating Systems Principles, pp. 322\u2013337 (2019)","DOI":"10.1145\/3341301.3359658"},{"key":"8_CR8","doi-asserted-by":"crossref","unstructured":"Hu, Y., Ghosh, R., Govindan, R.: Scrooge: a cost-effective deep learning inference system. In: Proceedings of the ACM Symposium on Cloud Computing, pp. 624\u2013638 (2021)","DOI":"10.1145\/3472883.3486993"},{"key":"8_CR9","doi-asserted-by":"crossref","unstructured":"Crankshaw, D., et al.: Inferline: latency-aware provisioning and scaling for prediction serving pipelines. In: Proceedings of the 11th ACM Symposium on Cloud Computing, pp. 477\u2013491 (2020)","DOI":"10.1145\/3419111.3421285"},{"key":"8_CR10","unstructured":"Crankshaw, D., Wang, X., Zhou, G., Franklin, M.J., Gonzalez, J.E., Stoica, I.: Clipper: a $$\\{$$Low-Latency$$\\}$$ online prediction serving system. In: 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17), pp. 613\u2013627 (2017)"},{"key":"8_CR11","unstructured":"Romero, F., Li, Q., Yadwadkar, N.J., Kozyrakis, C.: $$\\{$$INFaaS$$\\}$$: automated model-less inference serving. In: 2021 USENIX Annual Technical Conference (USENIX ATC 21), pp. 397\u2013411 (2021)"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Hu, Y., et al.: Rim: offloading inference to the edge. In: Proceedings of the International Conference on Internet-of-Things Design and Implementation, pp. 80\u201392 (2021)","DOI":"10.1145\/3450268.3453521"},{"key":"8_CR13","unstructured":"Girshick, R., Radosavovic, I., Gkioxari, G., Doll\u00e1r, P., He, K.: Detectron. https:\/\/github.com\/facebookresearch\/detectron"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Dean, J., Andr\u00e9 Barroso, L.: The tail at scale. Commun. ACM 56(2), 74\u201380 (2013)","DOI":"10.1145\/2408776.2408794"},{"key":"8_CR15","doi-asserted-by":"crossref","unstructured":"Gandhi, A., Harchol-Balter, M., Raghunathan, R., Kozuch, M.A.: Autoscale: dynamic, robust capacity management for multi-tier data centers. ACM Trans. Comput. Syst. (TOCS), 30(4), 1\u201326 (2012)","DOI":"10.1145\/2382553.2382556"},{"key":"8_CR16","unstructured":"https:\/\/github.com\/tensorflow\/serving (2022)"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"8_CR18","unstructured":"Howard, A.G., et al.: Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-0834-5_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T02:04:22Z","timestamp":1710209062000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-0834-5_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819708338","9789819708345"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-0834-5_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"12 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tjutanklab.com\/ica3pp2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Online submission system","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"439","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"145","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}