{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T22:42:25Z","timestamp":1781908945015,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":67,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,22]],"date-time":"2026-06-22T00:00:00Z","timestamp":1782086400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2140346"],"award-info":[{"award-number":["2140346"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2231523"],"award-info":[{"award-number":["2231523"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2441179"],"award-info":[{"award-number":["2441179"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2348306"],"award-info":[{"award-number":["2348306"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2511445"],"award-info":[{"award-number":["2511445"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2518375"],"award-info":[{"award-number":["2518375"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2536952"],"award-info":[{"award-number":["2536952"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2544032"],"award-info":[{"award-number":["2544032"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"DOE U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-SC0026344"],"award-info":[{"award-number":["DE-SC0026344"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,22]]},"DOI":"10.1145\/3787109.3815288","type":"proceedings-article","created":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T14:17:19Z","timestamp":1781792239000},"page":"487-494","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["DORA: Dataflow-Instruction Orchestration Architecture for DNN Acceleration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4865-3708","authenticated-orcid":false,"given":"Xingzhen","family":"Chen","sequence":"first","affiliation":[{"name":"Brown University, Providence, RI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7655-4080","authenticated-orcid":false,"given":"Zhuoping","family":"Yang","sequence":"additional","affiliation":[{"name":"Brown University, Providence, RI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3659-339X","authenticated-orcid":false,"given":"Jinming","family":"Zhuang","sequence":"additional","affiliation":[{"name":"Brown University, Providence, RI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3429-4692","authenticated-orcid":false,"given":"Shixin","family":"Ji","sequence":"additional","affiliation":[{"name":"Brown University, Providence, RI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2333-3416","authenticated-orcid":false,"given":"Sarah","family":"Schultz","sequence":"additional","affiliation":[{"name":"Brown University, Providence, RI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0692-7486","authenticated-orcid":false,"given":"Zheng","family":"Dong","sequence":"additional","affiliation":[{"name":"Wayne State University, Detroit, MI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5864-4675","authenticated-orcid":false,"given":"Weisong","family":"Shi","sequence":"additional","affiliation":[{"name":"University of Delaware, Newark, DE, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0493-1844","authenticated-orcid":false,"given":"Peipei","family":"Zhou","sequence":"additional","affiliation":[{"name":"Brown University, Providence, RI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,22]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2018.00077"},{"key":"e_1_3_3_1_3_2","volume-title":"Vitis AI User Guide","year":"2023","unstructured":"AMD 2023. Vitis AI User Guide. AMD. https:\/\/docs.amd.com\/r\/en-US\/ug1414-vitis-ai"},{"key":"e_1_3_3_1_4_2","unstructured":"AMD\/Xilinx. 2021. Versal Adaptive Compute Acceleration Platform. https:\/\/www.xilinx.com\/products\/silicon-devices\/acap\/versal.html."},{"key":"e_1_3_3_1_5_2","volume-title":"AI Engine API and Intrinsics User Guide","year":"2023","unstructured":"AMD\/Xilinx. 2023. AI Engine API and Intrinsics User Guide."},{"key":"e_1_3_3_1_6_2","volume-title":"Versal ACAP AI Engine System C Simulator","year":"2023","unstructured":"AMD\/Xilinx. 2023. Versal ACAP AI Engine System C Simulator."},{"key":"e_1_3_3_1_7_2","unstructured":"Autoware Foundation. [n. d.]. Autoware - the world\u2019s leading open-source software project for autonomous driving. https:\/\/github.com\/autowarefoundation\/autoware"},{"key":"e_1_3_3_1_8_2","first-page":"2277","volume-title":"ICML","author":"Bacellar Alan Tendler\u00a0Leibel","year":"2024","unstructured":"Alan Tendler\u00a0Leibel Bacellar et\u00a0al. 2024. Differentiable Weightless Neural Networks. In ICML. 2277\u20132295. https:\/\/proceedings.mlr.press\/v235\/bacellar24a.html"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Mohammed\u00a0S Bensaleh et\u00a0al. 2018. Optimal task scheduling for distributed cluster with active storage devices and accelerated nodes. IEEE Access 6 (2018) 48195\u201348209.","DOI":"10.1109\/ACCESS.2018.2867683"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","unstructured":"Julian Blank et\u00a0al. [n. d.]. Pymoo: Multi-Objective Optimization in Python. IEEE Access ([n. d.]). 10.1109\/ACCESS.2020.2990567","DOI":"10.1109\/ACCESS.2020.2990567"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.23919\/ISC.2025.11020612"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICFPT51103.2020.00011"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICFPT51103.2020.00011"},{"key":"e_1_3_3_1_14_2","first-page":"1","volume-title":"ISCA","author":"Cai Jingwei","year":"2023","unstructured":"Jingwei Cai et\u00a0al. 2023. Inter-layer scheduling space definition and exploration for tiled accelerators. In ISCA. 1\u201317."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Hongzheng Chen et\u00a0al. 2024. Understanding the potential of fpga-based spatial acceleration for large language model inference. ACM TRETS 18 1 (2024) 1\u201329.","DOI":"10.1145\/3656177"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Hongzheng Chen et\u00a0al. 2024. Allo: A programming model for composable accelerator design. Proceedings of the ACM on Programming Languages 8 PLDI (2024) 593\u2013620.","DOI":"10.1145\/3656401"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Dimitrios Danopoulos et\u00a0al. 2025. AIE4ML: An End-to-End Framework for Compiling Neural Networks for the Next Generation of AMD AI Engines. arXiv (2025).","DOI":"10.1109\/FCCM68464.2026.00035"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/FPL64840.2024.00039"},{"key":"e_1_3_3_1_19_2","unstructured":"Jacob Devlin et\u00a0al. 2018. BERT: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.04805 (2018)."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","unstructured":"Peiyan Dong et\u00a0al. 2024. EQ-ViT: Algorithm-Hardware Co-Design for End-to-End Acceleration of Real-Time Vision Transformer Inference on Versal ACAP Architecture. TCAD (2024). 10.1109\/TCAD.2024.3443692","DOI":"10.1109\/TCAD.2024.3443692"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/FPL64840.2024.00019"},{"key":"e_1_3_3_1_22_2","first-page":"1","volume-title":"ISCA","author":"Fowers Jeremy","year":"2018","unstructured":"Jeremy Fowers et\u00a0al. 2018. A configurable cloud-scale DNN processor for real-time AI. In ISCA. IEEE, 1\u201314."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00012"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD63220.2024.00041"},{"key":"e_1_3_3_1_25_2","unstructured":"Nan Guan et\u00a0al. [n. d.]. Industry Challenge."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Zibo Guo et\u00a0al. 2024. An overlay accelerator of DeepLab CNN for spacecraft image segmentation on FPGA. Remote Sensing 16 5 (2024) 894.","DOI":"10.3390\/rs16050894"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Mathew Hall et\u00a0al. 2020. HPIPE: Heterogeneous layer-pipelined and sparse-aware CNN inference for FPGAs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2007.10451 (2020).","DOI":"10.1145\/3373087.3375380"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","unstructured":"Xiangnan He et\u00a0al. 2017. Neural Collaborative Filtering(WWW \u201917). 10.1145\/3038912.3052569","DOI":"10.1145\/3038912.3052569"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM62733.2025.00047"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM62733.2025.00043"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Mustafa Ibrahim et\u00a0al. 2025. VERSATILE: Very Fast Partial Reconfiguration Controller. ACM Transactions on Reconfigurable Technology and Systems 18 3 (2025) 1\u201322.","DOI":"10.1145\/3748728"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3716368.3735215"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","unstructured":"Lana Josipovic et\u00a0al. 2021. Synthesizing General-Purpose Code Into Dynamically Scheduled Circuits. IEEE Circuits and Systems Magazine 21 2 (2021) 97\u2013118. 10.1109\/MCAS.2021.3071631","DOI":"10.1109\/MCAS.2021.3071631"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00016"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","unstructured":"Jun Liu et\u00a0al. 2025. FlightVGM: Efficient Video Generation Model Inference with Online Sparsification and Hybrid Precision on FPGAs(FPGA \u201925). 10.1145\/3706628.3708864","DOI":"10.1145\/3706628.3708864"},{"key":"e_1_3_3_1_36_2","unstructured":"CPLEX\u00a0User\u2019s Manual. 1987. Ibm ilog cplex optimization studio. Version 12 1987-2018 (1987) 1."},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM62733.2025.00044"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS64960.2025.00023"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/FPL68686.2025.00051"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3748173.3779207"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/FPL60245.2023.00017"},{"key":"e_1_3_3_1_42_2","unstructured":"John Nickolls et\u00a0al. [n. d.]. Scalable parallel programming with CUDA. ([n. d.])."},{"key":"e_1_3_3_1_43_2","first-page":"652","volume-title":"CVPR","author":"Qi Charles\u00a0R","year":"2017","unstructured":"Charles\u00a0R Qi et\u00a0al. 2017. Pointnet: Deep learning on point sets for 3d classification and segmentation. In CVPR. 652\u2013660."},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","unstructured":"Jan-Frederik Schulte et\u00a0al. 2026. hls4ml: A Flexible Open-Source Platform for Deep Learning Acceleration on Reconfigurable Hardware. ACM TRETS (April 2026). 10.1145\/3801979Just Accepted.","DOI":"10.1145\/3801979"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3748173.3779543"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICFPT59805.2023.00016"},{"key":"e_1_3_3_1_47_2","unstructured":"Dhananjay\u00a0Rao Thallikar et\u00a0al. 2026. HMix: An Efficient Hardware Accelerator for Quantized MLP-Mixer Inference. (2026)."},{"key":"e_1_3_3_1_48_2","volume-title":"NIPS","author":"Tolstikhin Ilya","year":"2024","unstructured":"Ilya Tolstikhin et\u00a0al. 2024. MLP-mixer: an all-MLP architecture for vision. In NIPS. Curran Associates Inc., Red Hook, NY, USA, Article 1857, 12\u00a0pages."},{"key":"e_1_3_3_1_49_2","volume-title":"ISCA","author":"Tong Jianming","year":"2024","unstructured":"Jianming Tong et\u00a0al. 2024. FEATHER: A reconfigurable accelerator with data reordering support for low-cost on-chip dataflow switching. In ISCA. IEEE."},{"key":"e_1_3_3_1_50_2","volume-title":"International conference on machine learning","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron et\u00a0al. 2021. Training data-efficient image transformers & distillation through attention. In International conference on machine learning. PMLR."},{"key":"e_1_3_3_1_51_2","volume-title":"ISCA","author":"Wang Chengyue","year":"2025","unstructured":"Chengyue Wang et\u00a0al. 2025. Reconfigurable Stream Network Architecture. In ISCA."},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"publisher","unstructured":"Erwei Wang et\u00a0al. 2026. From Loop Nests to Silicon: Mapping AI Workloads onto AMD NPUs with MLIR-AIR. ACM TRETS (Jan. 2026). 10.1145\/3785670Just Accepted.","DOI":"10.1145\/3785670"},{"key":"e_1_3_3_1_53_2","unstructured":"Yu\u00a0Emma Wang et\u00a0al. 2019. Benchmarking TPU GPU and CPU platforms for deep learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1907.10701 (2019)."},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/3240765.3240856"},{"key":"e_1_3_3_1_55_2","volume-title":"Zynq-7000 SoC Technical Reference Manual","author":"Inc. Xilinx,","year":"2023","unstructured":"Xilinx, Inc.2023. Zynq-7000 SoC Technical Reference Manual. AMD. https:\/\/docs.amd.com\/r\/en-US\/ug585-zynq-7000-SoC-TRM"},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"publisher","unstructured":"Yixin Xu et\u00a0al. 2024. Ferroelectric FET-based context-switching FPGA enabling dynamic reconfiguration for adaptive deep learning machines. Science Advances (2024). arXiv:https:\/\/www.science.org\/doi\/pdf\/10.1126\/sciadv.adk152510.1126\/sciadv.adk1525","DOI":"10.1126\/sciadv.adk1525"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"publisher","DOI":"10.1109\/DAC63849.2025.11133088"},{"key":"e_1_3_3_1_58_2","volume-title":"ICCAD","author":"Yang Zhuoping","unstructured":"Zhuoping Yang et\u00a0al. [n. d.]. AIM: Accelerating Arbitrary-precision Integer Multiplication on Heterogeneous Reconfigurable Computing Platform Versal ACAP. In ICCAD."},{"key":"e_1_3_3_1_59_2","doi-asserted-by":"publisher","unstructured":"Shulin Zeng et\u00a0al. 2024. FlightLLM: Efficient Large Language Model Inference with a Complete Mapping Flow on FPGAs(FPGA \u201924). New York NY USA. 10.1145\/3626202.3637562","DOI":"10.1145\/3626202.3637562"},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507767"},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/3400302.3415609"},{"key":"e_1_3_3_1_62_2","volume-title":"Modeling and Optimization for Customized Computing: Performance, Energy and Cost Perspective","author":"Zhou Peipei","year":"2019","unstructured":"Peipei Zhou. 2019. Modeling and Optimization for Customized Computing: Performance, Energy and Cost Perspective. University of California, Los Angeles."},{"key":"e_1_3_3_1_63_2","doi-asserted-by":"publisher","DOI":"10.1145\/3543622.3573210"},{"key":"e_1_3_3_1_64_2","doi-asserted-by":"publisher","unstructured":"Jinming Zhuang et\u00a0al. 2024. CHARM 2.0: Composing Heterogeneous Accelerators for Deep Learning on Versal ACAP Architecture. ACM TRETS 17 3 Article 51 (Sept. 2024) 31\u00a0pages. 10.1145\/3686163","DOI":"10.1145\/3686163"},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","unstructured":"Jinming Zhuang et\u00a0al. 2025. ARIES: An Agile MLIR-Based Compilation Flow for Reconfigurable Devices with AI Engines(FPGA \u201925). New York NY USA. 10.1145\/3706628.3708870","DOI":"10.1145\/3706628.3708870"},{"key":"e_1_3_3_1_66_2","doi-asserted-by":"publisher","DOI":"10.1145\/3626202.3637569"},{"key":"e_1_3_3_1_67_2","unstructured":"Jinming Zhuang et\u00a0al. 2023. AutoMM: Energy-efficient multi-data-type matrix multiply design on heterogeneous programmable system-on-chip. (2023)."},{"key":"e_1_3_3_1_68_2","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247981"}],"event":{"name":"GLSVLSI '26: Great Lakes Symposium on VLSI 2026","location":"Canandaigua , NY , USA","acronym":"GLSVLSI '26","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CEDA"]},"container-title":["Proceedings of the Great Lakes Symposium on VLSI 2026"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3787109.3815288","content-type":"text\/html","content-version":"vor","intended-application":"syndication"}],"deposited":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T14:24:54Z","timestamp":1781792694000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3787109.3815288"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,22]]},"references-count":67,"alternative-id":["10.1145\/3787109.3815288","10.1145\/3787109"],"URL":"https:\/\/doi.org\/10.1145\/3787109.3815288","relation":{},"subject":[],"published":{"date-parts":[[2026,6,22]]},"assertion":[{"value":"2026-06-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}