{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T01:05:18Z","timestamp":1767315918022,"version":"3.48.0"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032068170","type":"print"},{"value":"9783032068187","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06818-7_13","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T01:01:34Z","timestamp":1767315694000},"page":"229-251","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Study on\u00a0Inference Latency for\u00a0Vision Transformers on\u00a0Mobile Devices"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8308-0231","authenticated-orcid":false,"given":"Zhuojin","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5110-203X","authenticated-orcid":false,"given":"Marco","family":"Paolieri","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8353-5040","authenticated-orcid":false,"given":"Leana","family":"Golubchik","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"13_CR1","unstructured":"Andoorveedu, M., Zhu, Z., Zheng, B., Pekhimenko, G.: Tempo: accelerating transformer-based model training through memory footprint reduction. In: NeurIPS 2022, vol. 35, pp. 12267\u201312282 (2022)"},{"key":"13_CR2","doi-asserted-by":"crossref","unstructured":"Baller, S.P., Jindal, A., Chadha, M., Gerndt, M.: DeepEdgeBench: benchmarking deep neural networks on edge devices. In: IC2E 2021, pp. 20\u201330. IEEE (2021)","DOI":"10.1109\/IC2E52221.2021.00016"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Boroumand, A., et\u00a0al.: Google neural network models for edge devices: analyzing and mitigating machine learning inference bottlenecks. In: PACT 2021, pp. 159\u2013172. IEEE (2021)","DOI":"10.1109\/PACT52795.2021.00019"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Cai, H., Li, J., Hu, M., Gan, C., Han, S.: EfficientViT: multi-scale linear attention for high-resolution dense prediction. arXiv preprint arXiv:2205.14756 (2022)","DOI":"10.1109\/ICCV51070.2023.01587"},{"key":"13_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"issue":"1","key":"13_CR6","first-page":"1","volume":"8","author":"S Cheng","year":"2024","unstructured":"Cheng, S., et al.: Thorough characterization and analysis of large transformer model training at-scale. POMACS 8(1), 1\u201325 (2024)","journal-title":"POMACS"},{"key":"13_CR7","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth $$16 \\times 16$$ words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"13_CR8","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.neunet.2017.12.012","volume":"107","author":"S Elfwing","year":"2018","unstructured":"Elfwing, S., Uchibe, E., Doya, K.: Sigmoid-weighted linear units for neural network function approximation in reinforcement learning. Neural Netw. 107, 3\u201311 (2018)","journal-title":"Neural Netw."},{"key":"13_CR9","unstructured":"Google: XNNPACK: High-efficiency floating-point neural network inference operators for mobile, server, and web. https:\/\/github.com\/google\/XNNPACK (2023)"},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR 2016, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"13_CR11","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs). arXiv preprint arXiv:1606.08415 (2016)"},{"key":"13_CR12","unstructured":"Howard, A.G., et\u00a0al.: MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"key":"13_CR13","first-page":"711","volume":"3","author":"A Ivanov","year":"2021","unstructured":"Ivanov, A., Dryden, N., Ben-Nun, T., Li, S., Hoefler, T.: Data movement is all you need: a case study on optimizing transformers. Proc. Mach. Learn. Syst. 3, 711\u2013732 (2021)","journal-title":"Proc. Mach. Learn. Syst."},{"issue":"1","key":"13_CR14","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1145\/3093337.3037698","volume":"45","author":"Y Kang","year":"2017","unstructured":"Kang, Y., et al.: Neurosurgeon: collaborative intelligence between the cloud and mobile edge. ACM SIGARCH Comput. Archit. News 45(1), 615\u2013629 (2017)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Kao, S.C., Subramanian, S., Agrawal, G., Yazdanbakhsh, A., Krishna, T.: Flat: an optimized dataflow for mitigating attention bottlenecks. In: ASPLOS 2023, vol.\u00a02, pp. 295\u2013310 (2023)","DOI":"10.1145\/3575693.3575747"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Li, Z., Paolieri, M., Golubchik, L.: Dataset of CNN and transformer inference latency measurements on mobile devices (2023). https:\/\/github.com\/qed-usc\/mobile-ml-benchmark.git","DOI":"10.1145\/3642968.3654818"},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Li, Z., Paolieri, M., Golubchik, L.: A benchmark for ML inference latency on mobile devices. In: EdgeSys 2024, pp. 31\u201336 (2024)","DOI":"10.1145\/3642968.3654818"},{"key":"13_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.peva.2024.102429","volume":"165","author":"Z Li","year":"2024","unstructured":"Li, Z., Paolieri, M., Golubchik, L.: Inference latency prediction for CNNs on heterogeneous mobile devices and ML frameworks. Perform. Eval. 165, 102429 (2024)","journal-title":"Perform. Eval."},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Z., et\u00a0al.: Swin transformer: hierarchical vision transformer using shifted windows. In: ICCV 2021, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"13_CR20","unstructured":"Louppe, G.: Understanding random forests: from theory to practice. arXiv preprint arXiv:1407.7502 (2014)"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Ma, N., Zhang, X., Zheng, H.T., Sun, J.: ShuffleNet v2: practical guidelines for efficient CNN architecture design. In: ECCV 2018, pp. 116\u2013131 (2018)","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"13_CR22","unstructured":"Meta: QNNPACK: Quantized Neural Networks PACKage (2023). https:\/\/github.com\/pytorch\/pytorch\/tree\/main\/aten\/src\/ATen\/native\/quantized\/cpu\/qnnpack"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Niu, W., et\u00a0al.: SmartMem: layout transformation elimination and adaptation for efficient DNN execution on mobile. In: ASPLOS 2024, vol.\u00a03, pp. 916\u2013931 (2024)","DOI":"10.1145\/3620666.3651384"},{"key":"13_CR24","doi-asserted-by":"crossref","unstructured":"Panopoulos, I., Nikolaidis, S., Venieris, S.I., Venieris, I.S.: Exploring the performance and efficiency of transformer models for NLP on mobile devices. In: ISCC 2023, pp.\u00a01\u20134. IEEE (2023)","DOI":"10.1109\/ISCC58397.2023.10217850"},{"key":"13_CR25","unstructured":"Qualcomm: Qualcomm Linux Performance Guide: DVFS governors (2024). https:\/\/docs.qualcomm.com\/bundle\/publicresource\/topics\/80-70014-10\/2-performance-features.html"},{"key":"13_CR26","unstructured":"Sovrasov, V.: ptflops: a flops counting tool for neural networks in PyTorch framework (2024). https:\/\/github.com\/sovrasov\/flops-counter.pytorch"},{"key":"13_CR27","doi-asserted-by":"crossref","unstructured":"Sun, H., Qu, Y., Wang, W., Dong, C., Zhang, L., Wu, Q.: An experimental study of DNN operator-level performance on edge devices. In: SmartIoT 2023, pp. 131\u2013138. IEEE (2023)","DOI":"10.1109\/SmartIoT58732.2023.00026"},{"key":"13_CR28","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS 2017, pp. 6000\u20136010 (2017)"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: ICCV 2021, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhang, L.L., Wang, Y., Yang, M.: Towards efficient vision transformer inference: a first study of transformers on mobile devices. In: HotMobile 2022, pp.\u00a01\u20137 (2022)","DOI":"10.1145\/3508396.3512869"},{"key":"13_CR31","unstructured":"Wightman, R.: PyTorch image models (2019). https:\/\/github.com\/rwightman\/pytorch-image-models"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Wolf, T., et\u00a0al.: Transformers: state-of-the-art natural language processing. In: EMNLP 2020, pp. 38\u201345 (2020)","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"13_CR33","doi-asserted-by":"crossref","unstructured":"Yu, W., et al.: Metaformer baselines for vision. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/TPAMI.2023.3329173"},{"key":"13_CR34","unstructured":"Zoph, B., Le, Q.V.: Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578 (2016)"}],"container-title":["Lecture Notes of the Institute for Computer Sciences, Social Informatics and Telecommunications Engineering","Performance Evaluation Methodologies and Tools"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06818-7_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T01:01:38Z","timestamp":1767315698000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06818-7_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032068170","9783032068187"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06818-7_13","relation":{},"ISSN":["1867-8211","1867-822X"],"issn-type":[{"value":"1867-8211","type":"print"},{"value":"1867-822X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Valuetools","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"EAI International Conference on Performance Evaluation Methodologies and Tools","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17th","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"valuetools2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}