{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T15:19:29Z","timestamp":1774365569128,"version":"3.50.1"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031226762","type":"print"},{"value":"9783031226779","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-22677-9_29","type":"book-chapter","created":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T09:04:32Z","timestamp":1673341472000},"page":"548-567","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["An Efficient Transformer Inference Engine on\u00a0DSP"],"prefix":"10.1007","author":[{"given":"Kangkang","family":"Chen","sequence":"first","affiliation":[]},{"given":"Huayou","family":"Su","sequence":"additional","affiliation":[]},{"given":"Chaorun","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Xiaoli","family":"Gong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,1,11]]},"reference":[{"key":"29_CR1","unstructured":"Abadi, M., et al.: Tensorflow: a system for large-scale machine learning. In: Proceedings of the 12th USENIX conference on Operating Systems Design and Implementation, pp. 265\u2013283 (2016)"},{"key":"29_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"29_CR3","unstructured":"Chen, T., et al.: TVM: an automated end-to-end optimizing compiler for deep learning. In: Proceedings of the 13th USENIX conference on Operating Systems Design and Implementation, pp. 579\u2013594 (2018)"},{"key":"29_CR4","doi-asserted-by":"crossref","unstructured":"Cho, K., et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: EMNLP (2014)","DOI":"10.3115\/v1\/D14-1179"},{"key":"29_CR5","unstructured":"Dice, D., Kogan, A.: Optimizing inference performance of transformers on CPUs. arXiv preprint arXiv:2102.06621 (2021)"},{"key":"29_CR6","unstructured":"Dosovitskiy, A., et al.: An image is worth 16$$\\,\\times \\,$$16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2020)"},{"key":"29_CR7","doi-asserted-by":"crossref","unstructured":"Fang, J., Yu, Y., Zhao, C., Zhou, J.: Turbotransformers: an efficient gpu serving system for transformer models. In: Proceedings of the 26th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 389\u2013402 (2021)","DOI":"10.1145\/3437801.3441578"},{"key":"29_CR8","doi-asserted-by":"crossref","unstructured":"Ganiev, A., Chapin, C., De Andrade, A., Liu, C.: An architecture for accelerated large-scale inference of transformer-based language models. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers, pp. 163\u2013169 (2021)","DOI":"10.18653\/v1\/2021.naacl-industry.21"},{"key":"29_CR9","unstructured":"Han, K., Xiao, A., Wu, E., Guo, J., Xu, C., Wang, Y.: Transformer in transformer, vol. 34, pp. 15908\u201315919 (2021)"},{"key":"29_CR10","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs). arXiv preprint arXiv:1606.08415 (2016)"},{"issue":"8","key":"29_CR11","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"29_CR12","unstructured":"Kenton, J.D.M.W.C., Toutanova, L.K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT, pp. 4171\u20134186 (2019)"},{"key":"29_CR13","unstructured":"Lan, Z., Chen, M., Goodman, S., Gimpel, K., Sharma, P., Soricut, R.: Albert: a lite BERT for self-supervised learning of language representations. In: International Conference on Learning Representations (2019)"},{"key":"29_CR14","doi-asserted-by":"crossref","unstructured":"Li, B., et al.: Ftrans: energy-efficient acceleration of transformers using FPGA. In: Proceedings of the ACM\/IEEE International Symposium on Low Power Electronics and Design, pp. 175\u2013180 (2020)","DOI":"10.1145\/3370748.3406567"},{"key":"29_CR15","doi-asserted-by":"crossref","unstructured":"li, G., et al.: Easy and efficient transformer: Scalable inference solution for large NLP mode. arXiv preprint arXiv:2104.12470 (2021)","DOI":"10.18653\/v1\/2022.naacl-industry.8"},{"key":"29_CR16","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Auto-ViT-Acc: An FPGA-aware automatic acceleration framework for vision transformer with mixed-scheme quantization. arXiv preprint arXiv:2208.05163 (2022)","DOI":"10.1109\/FPL57034.2022.00027"},{"key":"29_CR17","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"29_CR18","doi-asserted-by":"crossref","unstructured":"Liu, Z., Li, G., Cheng, J.: Hardware acceleration of fully quantized BERT for efficient natural language processing. In: 2021 Design, Automation & Test in Europe Conference & Exhibition (DATE), pp. 513\u2013516. IEEE (2021)","DOI":"10.23919\/DATE51398.2021.9474043"},{"key":"29_CR19","unstructured":"NVIDIA: Nvidia TensorRT (2020)"},{"key":"29_CR20","unstructured":"NVIDIA: Fastertransformer (2022)"},{"key":"29_CR21","unstructured":"Paszke, A., et al.: PyTorch: an imperative style, high-performance deep learning library, vol. 32 (2019)"},{"key":"29_CR22","doi-asserted-by":"crossref","unstructured":"Qi, P., Song, Y., Peng, H., Huang, S., Zhuge, Q., Sha, E.H.M.: Accommodating transformer onto FPGA: coupling the balanced model compression and FPGA-implementation optimization. In: Proceedings of the 2021 on Great Lakes Symposium on VLSI, pp. 163\u2013168 (2021)","DOI":"10.1145\/3453688.3461739"},{"key":"29_CR23","unstructured":"Vaswani, A., et al.: Attention is all you need, vol. 30 (2017)"},{"key":"29_CR24","doi-asserted-by":"crossref","unstructured":"Wang, X., Xiong, Y., Wei, Y., Wang, M., Li, L.: LightSeq: a high performance inference library for transformers. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers, pp. 113\u2013120 (2021)","DOI":"10.18653\/v1\/2021.naacl-industry.15"},{"key":"29_CR25","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: End-to-end video instance segmentation with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8741\u20138750 (2021)","DOI":"10.1109\/CVPR46437.2021.00863"},{"key":"29_CR26","doi-asserted-by":"crossref","unstructured":"Wang, Y., Wang, Q., Chu, X.: Energy-efficient inference service of transformer-based deep learning models on GPUs. In: 2020 International Conferences on Internet of Things (iThings) and IEEE Green Computing and Communications (GreenCom) and IEEE Cyber, Physical and Social Computing (CPSCom) and IEEE Smart Data (SmartData) and IEEE Congress on Cybermatics (Cybermatics), pp. 323\u2013331. IEEE (2020)","DOI":"10.1109\/iThings-GreenCom-CPSCom-SmartData-Cybermatics50389.2020.00067"},{"key":"29_CR27","unstructured":"Wu, S., Lv, T., Yuan, P., Zhao, P., Ye, J., Lin, H.: Optimization for BERT inference performance on CPU (2021)"},{"key":"29_CR28","doi-asserted-by":"crossref","unstructured":"Zhou, L., Zhou, Y., Corso, J.J., Socher, R., Xiong, C.: End-to-end dense video captioning with masked transformer. In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, pp. 8739\u20138748 (2018)","DOI":"10.1109\/CVPR.2018.00911"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-22677-9_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,20]],"date-time":"2023-03-20T11:40:17Z","timestamp":1679312417000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-22677-9_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031226762","9783031226779"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-22677-9_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"11 January 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Copenhagen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Denmark","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"91","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}