{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:06:48Z","timestamp":1750309608945,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.1145\/3672608.3707809","type":"proceedings-article","created":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T18:30:17Z","timestamp":1747247417000},"page":"532-540","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing Compute Core Assignment for Dynamic Batch Inference in AI Inference Accelerator"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9151-0315","authenticated-orcid":false,"given":"Ze-Wei","family":"Liou","sequence":"first","affiliation":[{"name":"Academia Sinica, Taipei, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7649-7581","authenticated-orcid":false,"given":"Ding-Yong","family":"Hong","sequence":"additional","affiliation":[{"name":"Academia Sinica, Taipei, Taiwan"}]}],"member":"320","published-online":{"date-parts":[[2025,5,14]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Advanced Micro Devices","author":"Inc.","year":"2023","unstructured":"Inc. Advanced Micro Devices. 2023. AMD Alveo V70 AI Accelerator. https:\/\/www.xilinx.com\/applications\/data-center\/v70.html."},{"key":"e_1_3_2_1_2_1","unstructured":"Amazon. 2023. Inferentia. aws.amazon.com\/machine-learning\/inferentia\/."},{"key":"e_1_3_2_1_3_1","unstructured":"AWS. 2022. TorchServe on Amazon SageMaker. https:\/\/aws.amazon.com\/tw\/blogs\/machine-learning\/optimize-your-inference-jobs-using-dynamic-batch-inference-with-torchserve-on-amazon-sagemaker\/."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1379022.1375595"},{"key":"e_1_3_2_1_5_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel Ziegler Jeffrey Wu Clemens Winter Chris Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems (NeurIPS). 1877\u20131901."},{"key":"e_1_3_2_1_6_1","article-title":"Optimus: An Operator Fusion Framework for Deep Neural Networks","volume":"22","author":"Cai Xuyi","year":"2022","unstructured":"Xuyi Cai, Ying Wang, and Lei Zhang. 2022. Optimus: An Operator Fusion Framework for Deep Neural Networks. ACM Trans. Embed. Comput. Syst. 22, 1, Article 1 (oct 2022), 26 pages.","journal-title":"ACM Trans. Embed. Comput. Syst."},{"key":"e_1_3_2_1_7_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, 578\u2013594."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"e_1_3_2_1_9_1","unstructured":"Scott Cyphers Arjun K Bansal Anahita Bhiwandiwalla Jayaram Bobba Matthew Brookhart Avijit Chakraborty Will Constable Christian Convey Leona Cook Omar Kanawi et al. 2018. Intel ngraph: An intermediate representation compiler and executor for deep learning. arXiv preprint arXiv:1801.08058 (2018)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589348"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_12_1","volume-title":"Yang Yang, and Yanqi Zhou.","author":"Hestness Joel","year":"2017","unstructured":"Joel Hestness, Sharan Narang, Newsha Ardalani, Gregory F. Diamos, Heewoo Jun, Hassan Kianinejad, Md. Mostofa Ali Patwary, Yang Yang, and Yanqi Zhou. 2017. Deep Learning Scaling is Predictable, Empirically. arXiv:1712.00409"},{"key":"e_1_3_2_1_13_1","unstructured":"Intel. 2017. Neural Compute Stick. https:\/\/software.intel.com\/content\/www\/us\/en\/develop\/articles\/intel-movidius-neural-compute-stick.html"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_15_1","unstructured":"B. Kunz. 2020. The Story of CVflow. https:\/\/www.ambarella.com\/blog\/the-story-of-cvflow\/."},{"key":"e_1_3_2_1_16_1","volume-title":"CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark. arXiv preprint arXiv:1812.00324","author":"Li Jiefeng","year":"2018","unstructured":"Jiefeng Li, Can Wang, Hao Zhu, Yihuan Mao, Hao-Shu Fang, and Cewu Lu. 2018. CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark. arXiv preprint arXiv:1812.00324 (2018)."},{"key":"e_1_3_2_1_17_1","volume-title":"Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision (ECCV).","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael M. Yeh, Scott Antol, David B. J. Poole, Ross Girshick, and Piotr Dolla\u0155. 2014. Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021736"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2020.2975185"},{"key":"e_1_3_2_1_20_1","unstructured":"Microsoft. 2023. Batch Inference. https:\/\/github.com\/microsoft\/batch-inference."},{"key":"e_1_3_2_1_21_1","unstructured":"AWS Neuron. 2024. NeuronCore Batching. https:\/\/awsdocs-neuron.readthedocs-hosted.com\/en\/latest\/general\/arch\/neuron-features\/neuroncore-batching.html."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454083"},{"key":"e_1_3_2_1_23_1","unstructured":"Nvidia. 2024. Concurrency and Dynamic Batching on Jetson. https:\/\/docs.nvidia.com\/deeplearning\/triton-inference-server\/user-guide\/docs\/examples\/jetson\/concurrency_and_dynamic_batching\/README.html."},{"key":"e_1_3_2_1_24_1","unstructured":"Qualcomm. 2023. Qualcomm\u00ae Cloud AI 100 (Edge Version). https:\/\/www.qualcomm.com\/content\/dam\/qcomm-martech\/dm-assets\/documents\/qualcomm_cloud_ai_100_announcment_deck_.pdf."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2020.2991741"},{"key":"e_1_3_2_1_28_1","volume-title":"An Evaluation of Edge TPU Accelerators for Convolutional Neural Networks. In IEEE International Symposium on Workload Characterization (IISWC). 79\u201391","author":"Seshadri Kiran","year":"2022","unstructured":"Kiran Seshadri, Berkin Akin, James Laudon, Ravi Narayanaswami, and Amir Yazdanbakhsh. 2022. An Evaluation of Edge TPU Accelerators for Convolutional Neural Networks. In IEEE International Symposium on Workload Characterization (IISWC). 79\u201391."},{"volume-title":"Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics (ACL). 1556\u20131566","author":"Tai Kai Sheng","key":"e_1_3_2_1_29_1","unstructured":"Kai Sheng Tai, Richard Socher, and Christopher D. Manning. 2015. Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks. In Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics (ACL). 1556\u20131566."},{"volume-title":"23rd International Conference on Pattern Recognition (ICPR). 2464\u20132469","author":"Teerapittayanon Surat","key":"e_1_3_2_1_30_1","unstructured":"Surat Teerapittayanon, Bradley McDanel, and H.T. Kung. 2016. BranchyNet: Fast inference via early exiting from deep neural networks. In 23rd International Conference on Pattern Recognition (ICPR). 2464\u20132469."},{"key":"e_1_3_2_1_31_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar Aurelien Rodriguez Armand Joulin Edouard Grave and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. arXiv:2302.13971"},{"key":"e_1_3_2_1_32_1","unstructured":"Ultralytics. 2021. YOLOv5: A state-of-the-art real-time object detection system. https:\/\/docs.ultralytics.com."},{"key":"e_1_3_2_1_33_1","volume-title":"Tensor comprehensions: Framework-agnostic high-performance machine learning abstractions. arXiv preprint arXiv:1802.04730","author":"Vasilache Nicolas","year":"2018","unstructured":"Nicolas Vasilache, Oleksandr Zinenko, Theodoros Theodoridis, Priya Goyal, Zachary DeVito, William S Moses, Sven Verdoolaege, Andrew Adams, and Albert Cohen. 2018. Tensor comprehensions: Framework-agnostic high-performance machine learning abstractions. arXiv preprint arXiv:1802.04730 (2018)."},{"key":"e_1_3_2_1_34_1","volume-title":"RaPiD: AI Accelerator for Ultra-low Precision Training and Inference. In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). 153\u2013166","author":"Venkataramani Swagath","year":"2021","unstructured":"Swagath Venkataramani, Vijayalakshmi Srinivasan, Wei Wang, Sanchari Sen, Jintao Zhang, Ankur Agrawal, Monodeep Kar, Shubham Jain, Alberto Mannari, Hoang Tran, Yulong Li, Eri Ogawa, Kazuaki Ishizaki, Hiroshi Inoue, Marcel Schaal, Mauricio Serrano, Jungwook Choi, Xiao Sun, Naigang Wang, Chia-Yu Chen, Allison Allain, James Bonano, Nianzheng Cao, Robert Casatuta, et al. 2021. RaPiD: AI Accelerator for Ultra-low Precision Training and Inference. In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). 153\u2013166."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2983686"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414654"},{"key":"e_1_3_2_1_37_1","volume-title":"BCEdge: SLO-Aware DNN Inference Services with Adaptive Batch-Concurrent Scheduling on Edge Devices","author":"Zhang Ziyang","year":"2024","unstructured":"Ziyang Zhang, Yang Zhao, Huan Li, and Jie Liu. 2024. BCEdge: SLO-Aware DNN Inference Services with Adaptive Batch-Concurrent Scheduling on Edge Devices. IEEE Transactions on Network and Service Management (2024), 1\u20131."}],"event":{"name":"SAC '25: 40th ACM\/SIGAPP Symposium on Applied Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"],"location":"Catania International Airport Catania Italy","acronym":"SAC '25"},"container-title":["Proceedings of the 40th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707809","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3672608.3707809","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:32Z","timestamp":1750298252000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707809"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":37,"alternative-id":["10.1145\/3672608.3707809","10.1145\/3672608"],"URL":"https:\/\/doi.org\/10.1145\/3672608.3707809","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]},"assertion":[{"value":"2025-05-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}