{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T10:34:02Z","timestamp":1762166042899,"version":"build-2065373602"},"publisher-location":"Singapore","reference-count":12,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819510207"},{"type":"electronic","value":"9789819510214"}],"license":[{"start":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T00:00:00Z","timestamp":1762214400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T00:00:00Z","timestamp":1762214400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-1021-4_35","type":"book-chapter","created":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T10:28:44Z","timestamp":1762165724000},"page":"426-431","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Lembda: Optimizing LLM Inference on\u00a0Embedded Platforms via\u00a0CPU\/FPGA Co-processing"],"prefix":"10.1007","author":[{"given":"Jinwei","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Chenhao","family":"Xue","sequence":"additional","affiliation":[]},{"given":"Xiping","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Ren","sequence":"additional","affiliation":[]},{"given":"Jiaxing","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Guangyu","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Xinnan","family":"Lin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,4]]},"reference":[{"key":"35_CR1","unstructured":"OpenAI, \u201cChatgpt\u201d (2025). https:\/\/chat.openai.com. Accessed 7 Apr 2025"},{"key":"35_CR2","unstructured":"DeepSeek, \u201cDeepseek official website\u201d (2025). https:\/\/www.deepseek.com. Accessed 7 Apr 2025"},{"key":"35_CR3","unstructured":"Li, Y., et al.: Personal llm agents: Insights and survey about the capability, efficiency and security arXiv preprintarXiv:2401.05459 (2024)"},{"key":"35_CR4","doi-asserted-by":"crossref","unstructured":"K\u00f6k, \u0130., Demirci, O., \u00d6zdemir, S.: When IoT meet LLMs: applications and challenges. In: 2024 IEEE International Conference on Big Data (BigData), pp. 7075\u20137084. IEEE (2024)","DOI":"10.1109\/BigData62323.2024.10825187"},{"key":"35_CR5","unstructured":"Xilinx, Kria KV260 Vision AI Starter Kit Data Sheet (DS986) (2024). AMD\/Xilinx documentation. https:\/\/docs.amd.com\/r\/en-US\/ds986-kv260-starter-kit"},{"key":"35_CR6","unstructured":"Yang, A., et al.: Qwen2. 5 technical report. arXiv preprintarXiv:2412.15115 (2024)"},{"key":"35_CR7","doi-asserted-by":"crossref","unstructured":"Gholami, A., Kim, S., Dong, Z., Yao, Z., Mahoney, M.W., Keutzer, K.: A survey of quantization methods for efficient neural network inference. In: Low-Power Computer Vision, pp. 291\u2013326. Chapman and Hall\/CRC (2022)","DOI":"10.1201\/9781003162810-13"},{"key":"35_CR8","first-page":"87","volume":"6","author":"J Lin","year":"2024","unstructured":"Lin, J., et al.: AWQ: activation-aware weight quantization for on-device LLM compression and acceleration. Proc. Mach. Learn. Syst. 6, 87\u2013100 (2024)","journal-title":"Proc. Mach. Learn. Syst."},{"key":"35_CR9","doi-asserted-by":"crossref","unstructured":"Gong, R., et al.: Llmc: benchmarking large language model quantization with a versatile compression toolkit. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track, pp. 132\u2013152 (2024)","DOI":"10.18653\/v1\/2024.emnlp-industry.12"},{"key":"35_CR10","unstructured":"Dao, T., Fu, D., Ermon, S., Rudra, A., R\u00e9, C.: Flashattention: fast and memory-efficient exact attention with io-awareness. In: Advances in Neural Information Processing Systems, vol.\u00a035, pp. 16\u00a0344\u201316\u00a0359 (2022)"},{"key":"35_CR11","unstructured":"Xiao, G., Tian, Y., Chen, B., Han, S., Lewis, M.: Efficient streaming language models with attention sinks. arXiv preprint arXiv:2309.17453 (2023)"},{"issue":"4","key":"35_CR12","doi-asserted-by":"publisher","first-page":"853","DOI":"10.1162\/089976699300016467","volume":"11","author":"NN Schraudolph","year":"1999","unstructured":"Schraudolph, N.N.: A fast, compact approximation of the exponential function. Neural Comput. 11(4), 853\u2013862 (1999)","journal-title":"Neural Comput."}],"container-title":["Lecture Notes in Computer Science","Advanced Parallel Processing Technologies"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-1021-4_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T10:28:48Z","timestamp":1762165728000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-1021-4_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,4]]},"ISBN":["9789819510207","9789819510214"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-1021-4_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,11,4]]},"assertion":[{"value":"4 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"APPT","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Advanced Parallel Processing Technologies","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"appt2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.appt-conference.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}