{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T21:21:33Z","timestamp":1757625693195,"version":"3.44.0"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031998560"},{"type":"electronic","value":"9783031998577"}],"license":[{"start":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:00:00Z","timestamp":1755907200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:00:00Z","timestamp":1755907200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-031-99857-7_8","type":"book-chapter","created":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:17:41Z","timestamp":1755839861000},"page":"103-117","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["BATCH-DNN: Adaptive and\u00a0Dynamic Batching for\u00a0Multi-DNN Accelerators"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7006-2376","authenticated-orcid":false,"given":"Piyumal","family":"Ranawaka","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7441-8245","authenticated-orcid":false,"given":"Per","family":"Stenstrom","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,23]]},"reference":[{"key":"8_CR1","unstructured":"Batchdnn_sim (2025). https:\/\/drive.google.com\/drive\/folders\/11EDgM6dKx1Zpl1_g31EapBzpHs-sLbzj?usp=sharing"},{"key":"8_CR2","doi-asserted-by":"crossref","unstructured":"Baek, E., et\u00a0al.: A multi-neural network acceleration architecture. In: ISCA, pp. 940\u2013953. IEEE (2020)","DOI":"10.1109\/ISCA45697.2020.00081"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Baek, E., et al.: Stfusion: fast and flexible multi-nn execution using spatio-temporal block fusion and memory management. IEEE Trans. Comput. (2022)","DOI":"10.1109\/TC.2022.3218428"},{"key":"8_CR4","unstructured":"BaiduResearch: Deepbench (2025). https:\/\/github.com\/baidu-research\/DeepBench"},{"key":"8_CR5","doi-asserted-by":"publisher","first-page":"22430","DOI":"10.1109\/ACCESS.2021.3137638","volume":"11","author":"DP Carrasco","year":"2021","unstructured":"Carrasco, D.P., et al.: T-yolo: tiny vehicle detection based on yolo and multi-scale convolutional neural networks. IEEE Access 11, 22430\u201322440 (2021)","journal-title":"IEEE Access"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Choi, J., et\u00a0al.: Enabling fine-grained spatial multitasking on systolic-array npus using dataflow mirroring. IEEE Trans. Comput. (2023)","DOI":"10.1109\/TC.2023.3299030"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Choi, Y., et\u00a0al.: Prema: a predictive multi-task scheduling algorithm for preemptible neural processing units. In: HPCA, pp. 220\u2013233. IEEE (2020)","DOI":"10.1109\/HPCA47549.2020.00027"},{"key":"8_CR8","doi-asserted-by":"crossref","unstructured":"Choi, Y., et\u00a0al.: Lazy batching: an sla-aware batching system for cloud machine learning inference. In: HPCA. IEEE (2021)","DOI":"10.1109\/HPCA51647.2021.00049"},{"key":"8_CR9","doi-asserted-by":"crossref","unstructured":"Drumond, M., et\u00a0al.: Equinox: training (for free) on a custom inference accelerator. In: MICRO-54, pp. 421\u2013433 (2021)","DOI":"10.1145\/3466752.3480057"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Ghodrati, S., et\u00a0al.: Planaria: dynamic architecture fission for spatial multi-tenant acceleration of deep neural networks. In: MICRO. IEEE (2020)","DOI":"10.1109\/MICRO50266.2020.00062"},{"key":"8_CR11","unstructured":"G\u00fcnel, M.: Googlenet (2016)"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Kim, S., et\u00a0al.: Moca: memory-centric, adaptive execution for multi-tenant deep neural networks. In: (HPCA), pp. 828\u2013841. IEEE (2023)","DOI":"10.1109\/HPCA56546.2023.10071035"},{"key":"8_CR13","unstructured":"Krizhevsky, A., et\u00a0al.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, vol. 25 (2012)"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Lee, J., et\u00a0al.: Dataflow mirroring: architectural support for highly efficient fine-grained spatial multitasking on systolic-array npus. In: DAC, pp. 247\u2013252. IEEE (2021)","DOI":"10.1109\/DAC18074.2021.9586312"},{"key":"8_CR15","doi-asserted-by":"crossref","unstructured":"Li, C., et\u00a0al.: Memory-computing decoupling: a dnn multitasking accelerator with adaptive data arrangement. IEEE TCADs (2022)","DOI":"10.1109\/TCAD.2022.3197493"},{"key":"8_CR16","unstructured":"Li, Y., et\u00a0al.: A high-performance and energy-efficient photonic architecture for multi-dnn acceleration. IEEE TPDS (2023)"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Li, Y., et\u00a0al.: A silicon photonic multi-dnn accelerator. In: PACT, pp. 238\u2013249. IEEE (2023)","DOI":"10.1109\/PACT58117.2023.00028"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Oh, Y.H., et\u00a0al.: Layerweaver: maximizing resource utilization of neural processing units via layer-wise scheduling. In: HPCA. IEEE (2021)","DOI":"10.1109\/HPCA51647.2021.00056"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Oh, Y.H., et\u00a0al.: Layerweaver+: a qos-aware layer-wise dnn scheduler for multi-tenant neural processing units. IEICE Trans. Inf. Syst. (2022)","DOI":"10.1587\/transinf.2021EDL8084"},{"key":"8_CR20","doi-asserted-by":"crossref","unstructured":"Reddi, V.J., et\u00a0al.: Mlperf inference benchmark. In: (ISCA), pp. 446\u2013459. IEEE (2020)","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"8_CR21","unstructured":"Ren, S., et\u00a0al.: Faster r-cnn: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"8_CR22","doi-asserted-by":"crossref","unstructured":"Reshadi, M., et\u00a0al.: Dynamic resource partitioning for multi-tenant systolic array based dnn accelerator (2023)","DOI":"10.1109\/PDP59025.2023.00019"},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Samajdar, A., et\u00a0al.: A systematic methodology for characterizing scalability of dnn accelerators using scale-sim. In: (ISPASS), pp. 58\u201368. IEEE (2020)","DOI":"10.1109\/ISPASS48437.2020.00016"},{"key":"8_CR24","doi-asserted-by":"crossref","unstructured":"Shin, J., et\u00a0al.: Algorithm\/architecture co-design for energy-efficient acceleration of multi-task dnn. In: ACM\/IEEE DAC, pp. 253\u2013258 (2022)","DOI":"10.1145\/3489517.3530455"},{"key":"8_CR25","doi-asserted-by":"crossref","unstructured":"Shomron, G., et\u00a0al.: Smt-sa: simultaneous multithreading in systolic arrays. In: IEEE CAL (2019)","DOI":"10.1109\/LCA.2019.2924007"},{"key":"8_CR26","doi-asserted-by":"crossref","unstructured":"Shomron, G., et\u00a0al.: Non-blocking simultaneous multithreading: Embracing the resiliency of deep neural networks. In: MICRO, pp. 256\u2013269. IEEE (2020)","DOI":"10.1109\/MICRO50266.2020.00032"},{"key":"8_CR27","unstructured":"Vaswani, A., et\u00a0al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"8_CR28","doi-asserted-by":"crossref","unstructured":"Wang, C., et\u00a0al.: Cd-msa: cooperative and deadline-aware scheduling for efficient multi-tenancy on dnn accelerators. IEEE TPDS (2023)","DOI":"10.1109\/TPDS.2023.3276759"},{"key":"8_CR29","unstructured":"Wang, F., et\u00a0al.: Residual attention network for image classification. In: Proceedings of the IEEE CVPR, pp. 3156\u20133164 (2017)"},{"key":"8_CR30","doi-asserted-by":"crossref","unstructured":"Yang, J., et\u00a0al.: Venus: a versatile deep neural network accel-erator architecture design for multiple applications. In: DAC (2023)","DOI":"10.1109\/DAC56929.2023.10247897"},{"key":"8_CR31","doi-asserted-by":"crossref","unstructured":"Yang, J., et\u00a0al.: Versa-dnn: a versatile architecture enabling high-performance and energy-efficient multi-dnn acceleration. In: IEEE TPDS (2023)","DOI":"10.1109\/TPDS.2023.3340953"},{"key":"8_CR32","doi-asserted-by":"crossref","unstructured":"Yin, L., et\u00a0al.: Polyform: a versatile architecture for multi-dnn execution via spatial and temporal acceleration. In: ICCD. IEEE (2023)","DOI":"10.1109\/ICCD58817.2023.00033"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2025: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-99857-7_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T19:08:32Z","timestamp":1757444912000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-99857-7_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,23]]},"ISBN":["9783031998560","9783031998577"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-99857-7_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,8,23]]},"assertion":[{"value":"23 August 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dresden","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2025.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}