{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T06:38:58Z","timestamp":1777876738044,"version":"3.51.4"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032180698","type":"print"},{"value":"9783032180704","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-18070-4_12","type":"book-chapter","created":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T22:31:02Z","timestamp":1777588262000},"page":"186-202","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Benchmarking Distilled Language Models: Performance and Efficiency in Resource-Constrained Settings"],"prefix":"10.1007","author":[{"given":"Sachin Gopal","family":"Wani","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eric","family":"Page","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ajay","family":"Dholakia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Ellison","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,1]]},"reference":[{"key":"12_CR1","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. In: NIPS Deep Learning and Representation Learning Workshop (2015)"},{"key":"12_CR2","unstructured":"Guo, D., Yang, D., Zhang, H., et al.: DeepSeek-R1: incentivizing reasoning capability in LLMs via reinforcement learning. arXiv:2501.12948 (2025)"},{"key":"12_CR3","unstructured":"Parthasarathy, V.B., Zafar, A., Khan, A., Shahid, A.: The ultimate guide to fine-tuning LLMs from basics to breakthroughs: an exhaustive review of technologies, research, best practices, applied research challenges and opportunities. arXiv:2408.13296 (2024)"},{"key":"12_CR4","unstructured":"Epoch AI: Estimating Training Compute of Deep Learning Models. https:\/\/epoch.ai\/blog\/estimating-training-compute. Accessed 08 June 2025"},{"key":"12_CR5","unstructured":"Hoffmann, J., Borgeaud, S., Mensch, A., et al.: Training compute-optimal large language models. In: Proceedings of the 36th Conference on Neural Information Processing Systems (NeurIPS 2022). Curran Associates, Red Hook (2022)"},{"key":"12_CR6","unstructured":"Gcore Team: A Comparative Analysis of NVIDIA Data Center GPUs. https:\/\/gcore.com\/blog\/nvidia-gpu-comparison\/. Accessed 08 June 2025"},{"key":"12_CR7","unstructured":"Xu, X., et al.: A survey on knowledge distillation of large language models. arXiv:2402.13116 (2024)"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Strubell, E., Ganesh, A., McCallum, A.: Energy and policy considerations for deep learning in NLP. In: Korhonen, A., Traum, D., M\u00e0rquez, L. (eds.) Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 3645\u20133650. Association for Computational Linguistics, Florence, Italy (2019)","DOI":"10.18653\/v1\/P19-1355"},{"key":"12_CR9","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. arXiv:2106.09685 (2021)"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Dettmers, T., Pagnoni, A., Holtzman, A., Zettlemoyer, L.: QLoRA: efficient finetuning of quantized LLMs. In: Advances in Neural Information Processing Systems, vol. 36, pp. 10088\u201310115. Curran Associates, Inc. (2023)","DOI":"10.52202\/075280-0441"},{"key":"12_CR11","unstructured":"Gudibande, A., Wallace, E., Snell, C., et al.: The false promise of imitating proprietary LLMs. In: Krause, A., Ranzato, M., Beygelzimer, A., Dauphin, Y., Liang, P., Vaughan, J.W. (eds.) Proceedings of the 40th International Conference on Machine Learning, PMLR, vol. 202, pp. 11956\u201311984. PMLR (2023)"},{"key":"12_CR12","unstructured":"Gu, Y., Dong, L., Wei, F., Huang, M.: MiniLLM: knowledge distillation of large language models. In: Proceedings of the Twelfth International Conference on Learning Representations (ICLR). OpenReview.net (2024)"},{"key":"12_CR13","unstructured":"Agarwal, R., Vieillard, N., Zhou, Y., et al.: On-policy distillation of language models: learning from self-generated mistakes. In: Proceedings of the Twelfth International Conference on Learning Representations (ICLR). OpenReview.net (2024)"},{"key":"12_CR14","unstructured":"Liu, A., Feng, B., Xue, B., et al.: DeepSeek-V3 technical report. arXiv:2412.19437 (2024)"},{"key":"12_CR15","unstructured":"Grattafiori, A., et al.: The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)"},{"issue":"10","key":"12_CR16","first-page":"43","volume":"65","author":"D Patterson","year":"2022","unstructured":"Patterson, D., et al.: The carbon footprint of machine learning training will plateau, then shrink. Commun. ACM 65(10), 43\u201352 (2022)","journal-title":"Commun. ACM"},{"key":"12_CR17","unstructured":"Rein, D., et al.: GPQA: a graduate-level google-proof Q&A benchmark. In: Oh, A., Naumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems, vol. 36. Curran Associates, Inc. (2023)"},{"key":"12_CR18","unstructured":"Zhang, Z., Yao, Y., Ding, M., et al.: AIME: a meticulously curated benchmark for large language model evaluation. In: Advances in Neural Information Processing Systems, vol. 38. Curran Associates, Inc. (2024)"},{"key":"12_CR19","unstructured":"Chen, M., Tworek, J., Jun, H., et al.: Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374 (2021)"},{"key":"12_CR20","unstructured":"Epoch AI: AI Benchmarking Hub. https:\/\/epoch.ai\/data\/ai-benchmarking-dashboard. Accessed 18 June 2025"},{"key":"12_CR21","unstructured":"LocalAIME. https:\/\/github.com\/Belluxx\/LocalAIME. Accessed 20 June 2025"},{"key":"12_CR22","unstructured":"OpenAI: simple-evals. https:\/\/github.com\/openai\/simple-evals. Accessed 20 June 2025"},{"key":"12_CR23","unstructured":"DeepSeek-V2-Ollama-Simple-Evals. https:\/\/github.com\/kennethleungty\/DeepSeek-V2-Ollama-Simple-Evals. Accessed 20 June 2025"},{"key":"12_CR24","unstructured":"Yang, A., et al.: Qwen3 technical report. arXiv:2505.09388 (2025)"},{"key":"12_CR25","unstructured":"Yang, A., Yang, B., Zhang, B., et al.: Qwen2.5 technical report. arXiv:2412.15115 (2024)"},{"key":"12_CR26","unstructured":"DeepSeek AI: DeepSeek-R1-0528-Qwen3-8B. https:\/\/huggingface.co\/deepseek-ai\/DeepSeek-R1-0528-Qwen3-8B. Accessed 20 June 2025"},{"key":"12_CR27","unstructured":"xAI: Grok 3 Beta\u2014The Age of Reasoning Agents. https:\/\/x.ai\/news\/grok-3. Accessed 20 June 2025"},{"key":"12_CR28","unstructured":"Prithiv ML Mods: o3-mini & Deepseek-R1. https:\/\/huggingface.co\/blog\/prithivMLmods\/o3-mini-deepseek-and-r1. Accessed 20 June 2025"},{"key":"12_CR29","unstructured":"Microsoft: microsoft\/phi-4. https:\/\/huggingface.co\/microsoft\/phi-4. Accessed 20 June 2025"},{"key":"12_CR30","unstructured":"Vellum: LLM Leaderboard 2025. https:\/\/www.vellum.ai\/llm-leaderboard. Accessed 20 June 2025"},{"key":"12_CR31","unstructured":"Hendrycks, D., et al.: Measuring massive multitask language understanding. In: Proceedings of the International Conference on Learning Representations (ICLR) (2021)"},{"key":"12_CR32","unstructured":"Srivastava, A., et al.: Beyond the imitation game: quantifying and extrapolating the capabilities of language models. arXiv preprint arXiv:2206.04615 (2022)"},{"key":"12_CR33","unstructured":"Liang, P., et al.: Holistic evaluation of language models. arXiv preprint arXiv:2211.09110 (2022)"},{"key":"12_CR34","unstructured":"Gasanov, E., et al.: Hugging face open LLM leaderboard. https:\/\/huggingface.co\/spaces\/HuggingFaceH4\/open_llm_leaderboard. Accessed 01 Aug 2025"},{"key":"12_CR35","unstructured":"Poretschkin, M., et al.: Efficiency spectrum of large language models. arXiv preprint arXiv:2404.18560 (2024)"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Hsieh, C.Y., et al.: Distilling step-by-step! outperforming larger language models with less training data and smaller model sizes. arXiv preprint arXiv:2305.02301 (2023)","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"key":"12_CR37","unstructured":"Wani, S., Ellison, D., Page, E., Dholakia, A.: Lenovo LLM Sizing Guide. Lenovo Press. https:\/\/lenovopress.lenovo.com\/lp2130-lenovo-llm-sizing-guide. Accessed 01 Aug 2025"}],"container-title":["Lecture Notes in Computer Science","Performance Evaluation and Benchmarking"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-18070-4_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T22:31:07Z","timestamp":1777588267000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-18070-4_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032180698","9783032180704"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-18070-4_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"1 April 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TPCTC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Technology Conference on Performance Evaluation and Benchmarking","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"London","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tpctc2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.tpc.org\/tpctc\/tpctc2025\/default5.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}