{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T18:02:15Z","timestamp":1743098535806,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":24,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819794331"},{"type":"electronic","value":"9789819794348"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-9434-8_30","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T14:03:04Z","timestamp":1730383384000},"page":"385-397","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["The Benefits in\u00a0Shallow: Merge Decoding Across Large Language Model Layers"],"prefix":"10.1007","author":[{"given":"Yuechi","family":"Zhou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chuyue","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenjing","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinrui","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiuchang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenghua","family":"Ni","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Juntao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"30_CR1","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"30_CR2","unstructured":"Chuang, Y.S., Xie, Y., Luo, H., Kim, Y., Glass, J., He, P.: Dola: decoding by contrasting layers improves factuality in large language models. arXiv preprint arXiv:2309.03883 (2023)"},{"key":"30_CR3","unstructured":"Cobbe, K., et al.: Training verifiers to solve math word problems. arXiv preprint arXiv:2110.14168 (2021)"},{"key":"30_CR4","doi-asserted-by":"crossref","unstructured":"Fan, A., Lewis, M., Dauphin, Y.: Hierarchical neural story generation. arXiv preprint arXiv:1805.04833 (2018)","DOI":"10.18653\/v1\/P18-1082"},{"key":"30_CR5","doi-asserted-by":"crossref","unstructured":"Gao, T., Yao, X., Chen, D.: Simcse: simple contrastive learning of sentence embeddings. arXiv preprint arXiv:2104.08821 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"30_CR6","unstructured":"Garipov, T., Izmailov, P., Podoprikhin, D., Vetrov, D.P., Wilson, A.G.: Loss surfaces, mode connectivity, and fast ensembling of dnns. Adv. Neural Inf. Process. Syst. 31 (2018)"},{"key":"30_CR7","doi-asserted-by":"crossref","unstructured":"Gera, A., et al.: The benefits of bad advice: autocontrastive decoding across model layers. arXiv preprint arXiv:2305.01628 (2023)","DOI":"10.18653\/v1\/2023.acl-long.580"},{"key":"30_CR8","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1162\/tacl_a_00370","volume":"9","author":"M Geva","year":"2021","unstructured":"Geva, M., Khashabi, D., Segal, E., Khot, T., Roth, D., Berant, J.: Did aristotle use a laptop? a question answering benchmark with implicit reasoning strategies. Trans. Assoc. Comput. Linguist. 9, 346\u2013361 (2021)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"30_CR9","unstructured":"Holtzman, A., Buys, J., Du, L., Forbes, M., Choi, Y.: The curious case of neural text degeneration. arXiv preprint arXiv:1904.09751 (2019)"},{"key":"30_CR10","unstructured":"Izmailov, P., Podoprikhin, D., Garipov, T., Vetrov, D., Wilson, A.G.: Averaging weights leads to wider optima and better generalization. arXiv preprint arXiv:1803.05407 (2018)"},{"key":"30_CR11","unstructured":"Jason, W., Xuezhi, W., Dale, S., Maarten, B., Ed, C., Quoc, L., Denny, Z.: Chain of thought prompting elicits reasoning in large language models. arXiv preprint arXiv:2201.11903 (2022)"},{"key":"30_CR12","unstructured":"Kaya, Y., Hong, S., Dumitras, T.: Shallow-deep networks: understanding and mitigating network overthinking. In: International Conference on Machine Learning, pp. 3301\u20133310. PMLR (2019)"},{"key":"30_CR13","doi-asserted-by":"crossref","unstructured":"Kincaid, J.P., Fishburne\u00a0Jr, R.P., Rogers, R.L., Chissom, B.S.: Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for navy enlisted personnel (1975)","DOI":"10.21236\/ADA006655"},{"key":"30_CR14","doi-asserted-by":"crossref","unstructured":"Li, X.L., et al.: Contrastive decoding: open-ended text generation as optimization. arXiv preprint arXiv:2210.15097 (2022)","DOI":"10.18653\/v1\/2023.acl-long.687"},{"key":"30_CR15","unstructured":"Lin, Y., et al.: Spurious feature diversification improves out-of-distribution generalization. arXiv preprint arXiv:2309.17230 (2023)"},{"key":"30_CR16","doi-asserted-by":"crossref","unstructured":"Liu, W., Zhou, P., Zhao, Z., Wang, Z., Deng, H., Ju, Q.: Fastbert: a self-distilling bert with adaptive inference time. arXiv preprint arXiv:2004.02178 (2020)","DOI":"10.18653\/v1\/2020.acl-main.537"},{"key":"30_CR17","unstructured":"Men, X., et al.: Shortgpt: layers in large language models are more redundant than you expect. arXiv preprint arXiv:2403.03853 (2024)"},{"key":"30_CR18","unstructured":"Merity, S., Xiong, C., Bradbury, J., Socher, R.: Pointer sentinel mixture models. arXiv preprint arXiv:1609.07843 (2016)"},{"key":"30_CR19","doi-asserted-by":"crossref","unstructured":"Panda, P., Sengupta, A., Roy, K.: Conditional deep learning for energy-efficient and enhanced pattern recognition. In: 2016 Design, Automation & Test in Europe Conference & Exhibition (DATE), pp. 475\u2013480. IEEE (2016)","DOI":"10.3850\/9783981537079_0819"},{"issue":"8","key":"30_CR20","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"key":"30_CR21","first-page":"21548","volume":"35","author":"Y Su","year":"2022","unstructured":"Su, Y., Lan, T., Wang, Y., Yogatama, D., Kong, L., Collier, N.: A contrastive framework for neural text generation. Adv. Neural. Inf. Process. Syst. 35, 21548\u201321561 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"30_CR22","doi-asserted-by":"crossref","unstructured":"Teerapittayanon, S., McDanel, B., Kung, H.T.: Branchynet: fast inference via early exiting from deep neural networks. In: 2016 23rd International Conference on Pattern Recognition (ICPR), pp. 2464\u20132469. IEEE (2016)","DOI":"10.1109\/ICPR.2016.7900006"},{"key":"30_CR23","unstructured":"Touvron, H., et\u00a0al.: Llama: open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"30_CR24","doi-asserted-by":"crossref","unstructured":"Zhu, Y., et al.: Aligning books and movies: towards story-like visual explanations by watching movies and reading books. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 19\u201327 (2015)","DOI":"10.1109\/ICCV.2015.11"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-9434-8_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T14:38:37Z","timestamp":1730385517000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-9434-8_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9789819794331","9789819794348"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-9434-8_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2024\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}