{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,30]],"date-time":"2026-06-30T21:09:48Z","timestamp":1782853788984,"version":"3.54.5"},"publisher-location":"Singapore","reference-count":23,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819698684","type":"print"},{"value":"9789819698691","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-9869-1_43","type":"book-chapter","created":{"date-parts":[[2025,7,24]],"date-time":"2025-07-24T13:43:55Z","timestamp":1753364635000},"page":"515-526","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Accelerating LLM Inference on RISC-V Edge Devices via Vector Extension Optimization"],"prefix":"10.1007","author":[{"given":"Zhilong","family":"Liu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Long","family":"Peng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenzhu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ke","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Binrui","family":"Zeng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jie","family":"Yu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaodong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,7,25]]},"reference":[{"key":"43_CR1","unstructured":"Yuan, Z., Shang, Y., Zhou, Y., et al.: LLM inference unveiled: survey and roofline model insights. arXiv preprint arXiv:2402.16363 (2024)"},{"key":"43_CR2","unstructured":"Li, J.H., Lin, J.K., Su, Y.C., et al.: SIMD everywhere optimization from ARM NEON to RISC-V vector extensions. arXiv preprint arXiv:2309.16509 (2023)"},{"key":"43_CR3","unstructured":"Zhou, Z., Ning, X., Hong, K., et al.: A survey on efficient inference for large language models. arXiv preprint arXiv:2404.14294 (2024)"},{"issue":"2","key":"43_CR4","doi-asserted-by":"publisher","first-page":"64","DOI":"10.3390\/info14020064","volume":"14","author":"S Kalapothas","year":"2023","unstructured":"Kalapothas, S., Galetakis, F., et al.: A survey on RISC-V-based machine learning ecosystem. Information 14(2), 64 (2023)","journal-title":"Information"},{"key":"43_CR5","unstructured":"Chen, L., Zhao, Y., Xie, Q., et al.: Optimization of Armv9 architecture general large language model inference performance based on Llama.cpp. arXiv preprint arXiv:2406.10816 (2024)"},{"key":"43_CR6","doi-asserted-by":"crossref","unstructured":"Gupta, S.R., Papadopoulou, N., Pericas, M.: Accelerating CNN inference on long vector architectures via co-design. In: 2023 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 145\u2013155. IEEE (2023)","DOI":"10.1109\/IPDPS54959.2023.00024"},{"key":"43_CR7","doi-asserted-by":"crossref","unstructured":"Li, X., Li, Y., Li, Y., et al.: FlexNN: efficient and adaptive DNN inference on memory-constrained edge devices. In: Proceedings of the 30th Annual International Conference on Mobile Computing and Networking (MobiCom), pp. 709\u2013723 (2024)","DOI":"10.1145\/3636534.3649391"},{"key":"43_CR8","unstructured":"Shen, H., Chang, H., Dong, B., et al.: Efficient LLM inference on CPUs. arXiv preprint arXiv:2311.00502 (2023)"},{"key":"43_CR9","doi-asserted-by":"crossref","unstructured":"Biswas, A., Ganguly, U.: QFALT: quantization and fault aware loss for training enables performance recovery with unreliable weights. In: 2024 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20136. IEEE (2024)","DOI":"10.1109\/IJCNN60899.2024.10649900"},{"key":"43_CR10","doi-asserted-by":"crossref","unstructured":"Evan, A., Sarosa, M., Asmara, R.A., et al.: Detection and counting of grape leaves using YOLOv8 via TFLite on mobile applications. In: 2024 International Conference on Electrical and Information Technology (IEIT), pp. 246\u2013251. IEEE (2024)","DOI":"10.1109\/IEIT64341.2024.10763328"},{"key":"43_CR11","unstructured":"Johnson, K.: Microsoft open-sources ONNX runtime model to speed up Google\u2019s BERT. VentureBeat (2020). https:\/\/venturebeat.com\/business\/microsoft-open-sources-onnx-runtime-model-to-speed-up-googles-bert\/"},{"key":"43_CR12","unstructured":"He, Q., Wu, Z.: Efficient LLM inference with Kcache. arXiv preprint arXiv:2404.18057 (2024)"},{"issue":"1","key":"43_CR13","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1134\/S1995080224010530","volume":"45","author":"VD Volokitin","year":"2024","unstructured":"Volokitin, V.D., Vasiliev, E.P., Kozinov, E.A., et al.: Improved vectorization of OpenCV algorithms for RISC-V CPUs. Lobachevskii J. Math. 45(1), 130\u2013142 (2024)","journal-title":"Lobachevskii J. Math."},{"issue":"20","key":"43_CR14","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.7424","volume":"35","author":"P Vizcaino","year":"2023","unstructured":"Vizcaino, P., Mantovani, F., Ferrer, R., et al.: Acceleration with long vector architectures: implementation and evaluation of the FFT kernel on NEC SX-Aurora and RISC-V vector extension. Concurrency Comput. Prac. Experience 35(20), e7424 (2023)","journal-title":"Concurrency Comput. Prac. Experience"},{"key":"43_CR15","doi-asserted-by":"crossref","unstructured":"Gupta, S.R., Papadopoulou, N., Peric\u00e0s, M.: Challenges and opportunities in the co-design of convolutions and RISC-V vector processors. In: Proceedings of the SC\u201923 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis, pp. 1550\u20131556 (2023)","DOI":"10.1145\/3624062.3624232"},{"key":"43_CR16","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Du, B., Zhang, L., et al.: Parallel DNN inference framework leveraging a compact RISC-V ISA-based multi-core system. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pp. 627\u2013635 (2020)","DOI":"10.1145\/3394486.3403105"},{"key":"43_CR17","doi-asserted-by":"crossref","unstructured":"Lai, H.M., Lee, J.K.: Efficient support of the scan vector model for RISC-V vector extension. In: Workshop Proceedings of the 51st International Conference on Parallel Processing, pp. 1\u20138 (2022)","DOI":"10.1145\/3547276.3548518"},{"key":"43_CR18","unstructured":"Marques Garcia, A., Malenza, G., Birke, R., et al.: Assessing large language models inference performance on a 64\u2011core RISC\u2011V CPU with silicon\u2011enabled vectors. In: BigHPC2024: Special Track on Big Data and High\u2011Performance Computing, CEUR Workshop Proceedings, vol. 3785, pp. 1\u20139 (2024)"},{"key":"43_CR19","doi-asserted-by":"crossref","unstructured":"Wang, J., Wang, L., Wang, P.: Optimisation of x264 encoder acceleration based on RISC-V vector instructions. In: Proceedings of the 2023 3rd International Symposium on Computer Technology and Information Science (ISCTIS). IEEE, pp. 1128\u20131133 (2023)","DOI":"10.1109\/ISCTIS58954.2023.10213200"},{"key":"43_CR20","doi-asserted-by":"crossref","unstructured":"Kozinov, E., Vasiliev, E., Gorshkov, A., et al.: Vectorization of gradient boosting of decision trees prediction in the CatBoost library for RISC-V processors. arXiv preprint arXiv:2405.11062 (2024)","DOI":"10.1007\/978-3-031-85700-3_3"},{"issue":"4","key":"43_CR21","doi-asserted-by":"publisher","first-page":"807","DOI":"10.1007\/s11390-023-1266-6","volume":"38","author":"RS Li","year":"2023","unstructured":"Li, R.S., Peng, P., Shao, Z.Y., et al.: Evaluating RISC-V vector instruction set architecture extension with computer vision workloads. J. Comput. Sci. Technol. 38(4), 807\u2013820 (2023)","journal-title":"J. Comput. Sci. Technol."},{"key":"43_CR22","doi-asserted-by":"crossref","unstructured":"Li, X., Wang, S., Li, S., et al.: Model editing for LLMs4Code: how far are we?. arXiv preprint arXiv:2411.06638 (2024)","DOI":"10.1109\/ICSE55347.2025.00049"},{"key":"43_CR23","doi-asserted-by":"crossref","unstructured":"Li, X., Li, S., Song, S., et al.: SWEA: Updating factual knowledge in large language models via subject word embedding altering. arXiv preprint arXiv:2401.17809 (2024)","DOI":"10.1609\/aaai.v39i23.34628"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-9869-1_43","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T10:03:22Z","timestamp":1780999402000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-9869-1_43"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819698684","9789819698691"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-9869-1_43","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"25 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}