{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T10:34:38Z","timestamp":1762166078120,"version":"build-2065373602"},"publisher-location":"Singapore","reference-count":23,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819510207","type":"print"},{"value":"9789819510214","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T00:00:00Z","timestamp":1762214400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T00:00:00Z","timestamp":1762214400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-1021-4_1","type":"book-chapter","created":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T10:29:38Z","timestamp":1762165778000},"page":"3-16","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DACO: Unlocking Latent Dataflow Opportunities in\u00a0Edge-Side SIMT Accelerators"],"prefix":"10.1007","author":[{"given":"Han","family":"Zhao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiying","family":"Xiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaochun","family":"Ye","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Deze","family":"Zeng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jing","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weihao","family":"Cui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Quan","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingwen","family":"Leng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Minyi","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,4]]},"reference":[{"key":"1_CR1","unstructured":"Grattafiori, A., et al.: The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1_CR3","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1007\/978-3-030-45691-7_4","volume-title":"Trends and Innovations in Information Systems and Technologies","author":"W Li","year":"2020","unstructured":"Li, W., Liewig, M.: A survey of AI accelerators for edge environment. In: Rocha, \u00c1., Adeli, H., Reis, L.P., Costanzo, S., Orovic, I., Moreira, F. (eds.) WorldCIST 2020. AISC, vol. 1160, pp. 35\u201344. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-45691-7_4"},{"key":"1_CR4","unstructured":"Jouppi, N.P., et al.: In-datacenter performance analysis of a tensor processing unit. In: Proceedings of the 44th Annual International Symposium on Computer Architecture, pp. 1\u201312 (2017)"},{"key":"1_CR5","unstructured":"Nvidia Orin GPU. https:\/\/www.nvidia.com\/en-sg\/autonomous-machines\/embedded-systems\/jetson-orin\/. Accessed 13 Apr 2025"},{"issue":"1","key":"1_CR6","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1145\/2654822.2541967","volume":"42","author":"T Chen","year":"2014","unstructured":"Chen, T., et al.: Diannao: a small-footprint high-throughput accelerator for ubiquitous machine-learning. ACM SIGARCH Comput. Arch. News 42(1), 269\u2013284 (2014)","journal-title":"ACM SIGARCH Comput. Arch. News"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Ye, X., Fan, D., Sun, N., Tang, S., Zhang, M., Zhang, H.: SimICT: a fast and flexible framework for performance and power evaluation of large-scale architecture. In: International Symposium on Low Power Electronics and Design (ISLPED), pp. 273\u2013278. IEEE (2013)","DOI":"10.1109\/ISLPED.2013.6629308"},{"issue":"1","key":"1_CR8","first-page":"1","volume":"21","author":"Z Fan","year":"2024","unstructured":"Fan, Z., et al.: Improving utilization of dataflow unit for multi-batch processing. ACM Trans. Arch. Code Optim. 21(1), 1\u201326 (2024)","journal-title":"ACM Trans. Arch. Code Optim."},{"key":"1_CR9","unstructured":"DFU developed by ICT of CAS. https:\/\/ict.cas.cn\/xwgg\/jssxw\/202109\/t20210928_6216336.html. Accessed 13 Apr 2025"},{"key":"1_CR10","unstructured":"NVIDIA, Vingelmann, P., Fitzek, F.H.P.: CUDA, release: 10.2.89 (2020). https:\/\/developer.nvidia.com\/cuda-toolkit"},{"issue":"4","key":"1_CR11","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1177\/10943420211008288","volume":"35","author":"N Kondratyuk","year":"2021","unstructured":"Kondratyuk, N., Nikolskiy, V., Pavlov, D., Stegailov, V.: GPU-accelerated molecular dynamics: state-of-art software performance and porting from Nvidia CUDA to AMD HIP. Int. J. High Perf. Comput. Appl. 35(4), 312\u2013324 (2021)","journal-title":"Int. J. High Perf. Comput. Appl."},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Rodriguez-Ferr\u00e1ndez, I., Kosmidis, L., Tali, M., Steenari, D., Hands, A., B\u00e9langer-Champagne, C.: Proton evaluation of single event effects in the NVIDIA GPU orin SoM: understanding radiation vulnerabilities beyond the SoC. In: 2024 IEEE 30th International Symposium on On-Line Testing and Robust System Design (IOLTS), pp. 1\u20137. IEEE (2024)","DOI":"10.1109\/IOLTS60994.2024.10616076"},{"key":"1_CR13","unstructured":"Ouyang, A.: Understanding the performance of transformer inference. Ph.D. thesis, Massachusetts Institute of Technology (2023)"},{"key":"1_CR14","unstructured":"Nvidia Tensor Core. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf. Accessed 13 Apr 2025"},{"issue":"3","key":"1_CR15","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/12.48862","volume":"39","author":"RS Nikhil","year":"1990","unstructured":"Nikhil, R.S., et al.: Executing a program on the MIT tagged-token dataflow architecture. IEEE Trans. Comput. 39(3), 300\u2013318 (1990)","journal-title":"IEEE Trans. Comput."},{"issue":"1\u20133","key":"1_CR16","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1016\/0010-4655(85)90135-3","volume":"37","author":"JR Gurd","year":"1985","unstructured":"Gurd, J.R.: The Manchester dataflow machine. Comput. Phys. Commun. 37(1\u20133), 49\u201362 (1985)","journal-title":"Comput. Phys. Commun."},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Abts, D., et al.: Think fast: a tensor streaming processor (TSP) for accelerating deep learning workloads. In: 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA), pp. 145\u2013158. IEEE (2020)","DOI":"10.1109\/ISCA45697.2020.00023"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Prabhakar, R., Jairath, S.: SambaNova SN10 RDU: accelerating software 2.0 with dataflow. In: 2021 IEEE Hot Chips 33 Symposium (HCS), pp. 1\u201337. IEEE (2021)","DOI":"10.1109\/HCS52781.2021.9567250"},{"key":"1_CR19","unstructured":"Nicol, C.: A coarse grain reconfigurable array (CGRA) for statically scheduled data flow computing. In: Wave Computing White Paper, pp. 1\u20139 (2017)"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Tillet, P., Kung, H.T., Cox, D.: Triton: an intermediate language and compiler for tiled neural network computations. In: Proceedings of the 3rd ACM SIGPLAN International Workshop on Machine Learning and Programming Languages, pp. 10\u201319 (2019)","DOI":"10.1145\/3315508.3329973"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"Huang, H., et al.: UNet 3+: a full-scale connected UNet for medical image segmentation. In: ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1055\u20131059. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053405"},{"key":"1_CR22","doi-asserted-by":"crossref","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, vol. 1 (Long and Short Papers), pp. 4171\u20134186 (2019)","DOI":"10.18653\/v1\/N19-1423"},{"issue":"3","key":"1_CR23","doi-asserted-by":"publisher","first-page":"452","DOI":"10.1360\/SSI-2024-0343","volume":"55","author":"J Leng","year":"2025","unstructured":"Leng, J., et al.: Dataflow microprocessor: development, trends, and challenges. SCIENTIA SINICA Informationis 55(3), 452 (2025)","journal-title":"SCIENTIA SINICA Informationis"}],"container-title":["Lecture Notes in Computer Science","Advanced Parallel Processing Technologies"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-1021-4_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T10:29:46Z","timestamp":1762165786000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-1021-4_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,4]]},"ISBN":["9789819510207","9789819510214"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-1021-4_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,4]]},"assertion":[{"value":"4 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"APPT","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Advanced Parallel Processing Technologies","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"appt2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.appt-conference.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}