{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T08:53:04Z","timestamp":1763196784644,"version":"3.45.0"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819533480","type":"print"},{"value":"9789819533497","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T00:00:00Z","timestamp":1763251200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T00:00:00Z","timestamp":1763251200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3349-7_2","type":"book-chapter","created":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T08:49:40Z","timestamp":1763196580000},"page":"16-28","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Knowledge Distillation for\u00a0Large Language Models Based on\u00a0Global Keywords and\u00a0Chain of\u00a0Thought"],"prefix":"10.1007","author":[{"given":"Xuening","family":"Li","sequence":"first","affiliation":[]},{"given":"Peng","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Fangjiong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Xiaojun","family":"Liang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,16]]},"reference":[{"key":"2_CR1","doi-asserted-by":"crossref","unstructured":"Bucilu\u01ce, C., Caruana, R., Niculescu-Mizil, A.: Model compression. In: Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 535\u2013541 (2006)","DOI":"10.1145\/1150402.1150464"},{"key":"2_CR2","unstructured":"Chen, F., Feng, Y.: Chain-of-thought prompt distillation for multimodal named entity and multimodal relation extraction. arXiv preprint arXiv:2306.14122 (2023)"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Chen, X., Huang, H., Gao, Y., Wang, Y., Zhao, J., Ding, K.: Learning to maximize mutual information for chain-of-thought distillation. arXiv preprint arXiv:2403.03348 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.409"},{"issue":"240","key":"2_CR4","first-page":"1","volume":"24","author":"A Chowdhery","year":"2023","unstructured":"Chowdhery, A., et al.: Palm: scaling language modeling with pathways. J. Mach. Learn. Res. 24(240), 1\u2013113 (2023)","journal-title":"J. Mach. Learn. Res."},{"key":"2_CR5","unstructured":"Cobbe, K., et\u00a0al.: Training verifiers to solve math word problems. arXiv preprint arXiv:2110.14168 (2021)"},{"key":"2_CR6","unstructured":"Deng, Y., Prasad, K., Fernandez, R., Smolensky, P., Chaudhary, V., Shieber, S.: Implicit chain of thought reasoning via knowledge distillation. arXiv preprint arXiv:2311.01460 (2023)"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Friha, O., Ferrag, M.A., Kantarci, B., Cakmak, B., Ozgun, A., Ghoualmi-Zine, N.: Llm-based edge intelligence: a comprehensive survey on architectures, applications, security and trustworthiness. IEEE Open J. Commun. Soc. (2024)","DOI":"10.1109\/OJCOMS.2024.3456549"},{"issue":"6","key":"2_CR8","doi-asserted-by":"publisher","first-page":"1789","DOI":"10.1007\/s11263-021-01453-z","volume":"129","author":"J Gou","year":"2021","unstructured":"Gou, J., Yu, B., Maybank, S.J., Tao, D.: Knowledge distillation: a survey. Int. J. Comput. Vision 129(6), 1789\u20131819 (2021)","journal-title":"Int. J. Comput. Vision"},{"key":"2_CR9","unstructured":"Ho, N., Schmid, L., Yun, S.Y.: Large language models are reasoning teachers. arXiv preprint arXiv:2212.10071 (2022)"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Hsieh, C.Y., et al.: Distilling step-by-step! outperforming larger language models with less training data and smaller model sizes. arXiv preprint arXiv:2305.02301 (2023)","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"issue":"14","key":"2_CR11","doi-asserted-by":"publisher","first-page":"6421","DOI":"10.3390\/app11146421","volume":"11","author":"D Jin","year":"2021","unstructured":"Jin, D., Pan, E., Oufattole, N., Weng, W.H., Fang, H., Szolovits, P.: What disease does this patient have? A large-scale open domain question answering dataset from medical exams. Appl. Sci. 11(14), 6421 (2021)","journal-title":"Appl. Sci."},{"key":"2_CR12","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Krishna, K., Roy, A., Iyyer, M.: Hurdles to progress in long-form question answering. arXiv preprint arXiv:2103.06332 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.393"},{"issue":"9","key":"2_CR14","first-page":"1","volume":"57","author":"P Kumar","year":"2024","unstructured":"Kumar, P.: Large language models (llms): survey, technical frameworks, and future challenges. Artif. Intell. Rev. 57(9), 1\u201351 (2024)","journal-title":"Artif. Intell. Rev."},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Kundu, A., Lim, F., Chew, A., Wynter, L., Chong, P., Lee, R.D.: Efficiently distilling llms for edge applications. arXiv preprint arXiv:2404.01353 (2024)","DOI":"10.18653\/v1\/2024.naacl-industry.5"},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"Liu, J., et al.: Rainier: reinforced knowledge introspector for commonsense question answering. arXiv preprint arXiv:2210.03078 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.611"},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Magister, L.C., Mallinson, J., Adamek, J., Malmi, E., Severyn, A.: Teaching small language models to reason. arXiv preprint arXiv:2212.08410 (2022)","DOI":"10.18653\/v1\/2023.acl-short.151"},{"issue":"140","key":"2_CR18","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Raiaan, M.A.K., et al.: A review on large language models: architectures, applications, taxonomies, open issues and challenges. IEEE Access (2024)","DOI":"10.36227\/techrxiv.24171183.v1"},{"key":"2_CR20","unstructured":"Talmor, A., Herzig, J., Lourie, N., Berant, J.: Commonsenseqa: a question answering challenge targeting commonsense knowledge. arXiv preprint arXiv:1811.00937 (2018)"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Wang, J., Li, W., Liu, W.J.: Hic-kgqa: improving multi-hop question answering over knowledge graph via hypergraph and inference chain. Knowl.-Based Syst. 277(Oct.9), 1.1\u20131.14 (2023)","DOI":"10.1016\/j.knosys.2023.110810"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Qcrd: quality-guided contrastive rationale distillation for large language models. arXiv preprint arXiv:2405.13014 (2024)","DOI":"10.18653\/v1\/2025.emnlp-main.724"},{"key":"2_CR23","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR24","unstructured":"Yao, S., et al.: Tree of thoughts: deliberate problem solving with large language models. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Yu, B., Li, Y., Wang, J.: Detecting causal language use in science findings. In: Proceedings of the EMNLP-IJCNLP, pp. 4664\u20134674 (2019)","DOI":"10.18653\/v1\/D19-1473"},{"key":"2_CR26","unstructured":"Zheng, L., et\u00a0al.: Alpa: automating inter-and intra-operator parallelism for distributed deep learning. In: 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22), pp. 559\u2013578 (2022)"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Zhu, X., Li, J., Liu, Y., Ma, C., Wang, W.: Distilling mathematical reasoning capabilities into small language models. Neural Netw. (2023)","DOI":"10.2139\/ssrn.4782551"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3349-7_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T08:49:46Z","timestamp":1763196586000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3349-7_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,16]]},"ISBN":["9789819533480","9789819533497"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3349-7_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,16]]},"assertion":[{"value":"16 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2025\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}