{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T07:10:45Z","timestamp":1771657845029,"version":"3.50.1"},"reference-count":32,"publisher":"Zhejiang University Press","issue":"4","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front Inform Technol Electron Eng"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1631\/fitee.2400453","type":"journal-article","created":{"date-parts":[[2025,5,7]],"date-time":"2025-05-07T09:45:50Z","timestamp":1746611150000},"page":"605-622","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Minimizing transformer inference overhead using controlling element on Shenwei AI accelerator","\u4f7f\u7528\u7533\u5a01\u4eba\u5de5\u667a\u80fd\u52a0\u901f\u5668\u7684\u63a7\u5236\u5355\u5143\u6700\u5c0f\u5316Transformer\u63a8\u7406\u5f00\u9500"],"prefix":"10.1631","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-2291-9499","authenticated-orcid":false,"given":"Yulong","family":"Zhao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunzhi","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yizhuo","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lufei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yaguang","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenyuan","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hankang","family":"Fang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"Qin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7870-6535","authenticated-orcid":false,"given":"Xin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"635","published-online":{"date-parts":[[2025,5,7]]},"reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/hpec.2019.8916466"},{"key":"ref2","first-page":"12449","article-title":"wav2vec 2.0: a framework for self-supervised learning of speech representations","volume-title":"Proc 34th Int Conf on Neural Information Processing Systems","author":"Baevski","year":"2020"},{"key":"ref3","article-title":"Memory system on fusion APUs: the benefits of zero copy","volume-title":"AMD Fusion Developer Summit","author":"Boudier","year":"2011"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830818"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476138"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/cluster49012.2020.00023"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/tcad.2018.2821565"},{"key":"ref8","first-page":"16344","article-title":"FLASHATTENTION: fast and memory-efficient exact attention with IO-awareness","volume-title":"Proc 36th Int Conf on Neural Information Processing Systems","author":"Dao","year":"2022"},{"key":"ref9","volume-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"ref10","volume-title":"EnergonAI: an inference system for 10-100 billion parameter transformer models","author":"Du","year":"2022"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441578"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/icpads.2013.47"},{"key":"ref13","volume-title":"DaVinci: a Scalable Architecture for Neural Network Computing","year":"2020"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3446382.3448606"},{"key":"ref15","volume-title":"FastFormers: highly efficient transformer models for natural language understanding","author":"Kim","year":"2020"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s00450-012-0209-1"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ispa-bdcloud-socialcom-sustaincom52081.2021.00061"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.5742"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/hcs49909.2020.9220641"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/mm.2008.50"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysarc.2022.102561"},{"key":"ref22","volume-title":"Language Models are Unsupervised Multitask Learners","author":"Radford","year":"2019"},{"key":"ref23","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"Proc 40th Int Conf on Machine Learning","author":"Radford","year":"2023"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/mm.2016.25"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/dac18074.2021.9586134"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/icicct.2017.7975190"},{"key":"ref27","volume-title":"Efficient processing of deep neural networks: a tutorial and survey","author":"Sze","year":"2017"},{"key":"ref28","volume-title":"LLaMA: open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref30","volume-title":"LightSeq: a high performance inference library for transformers","author":"Wang","year":"2021"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/hotchips.2019.8875671"},{"key":"ref32","first-page":"5","article-title":"Understanding the overheads of launching CUDA kernels","volume-title":"Int Conf on Parallel Processing","author":"Zhang","year":"2019"}],"container-title":["Frontiers of Information Technology &amp; Electronic Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1631\/FITEE.2400453.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1631\/FITEE.2400453\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1631\/FITEE.2400453.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T06:36:57Z","timestamp":1771655817000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1631\/FITEE.2400453"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4]]},"references-count":32,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["2054"],"URL":"https:\/\/doi.org\/10.1631\/fitee.2400453","relation":{},"ISSN":["2095-9184","2095-9230"],"issn-type":[{"value":"2095-9184","type":"print"},{"value":"2095-9230","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4]]},"assertion":[{"value":"28 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"All the authors declare that they have no conflict of interest.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}