{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T10:05:28Z","timestamp":1776679528560,"version":"3.51.2"},"publisher-location":"Singapore","reference-count":30,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819570775","type":"print"},{"value":"9789819570782","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-7078-2_31","type":"book-chapter","created":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T09:21:49Z","timestamp":1776676909000},"page":"480-496","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Fine-Tuning Alignment of\u00a0Large Language Models via\u00a0Label Smoothing and\u00a0Intermediate Contrastive Learning"],"prefix":"10.1007","author":[{"given":"Qian","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhendong","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingyun","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hetao","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,21]]},"reference":[{"key":"31_CR1","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"issue":"8","key":"31_CR2","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., et al.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"key":"31_CR3","unstructured":"Lee, H., et\u00a0al.: RLAIF vs. RLHF: Scaling reinforcement learning from human feedback with ai feedback. arXiv preprint arXiv:2309.00267 (2023)"},{"key":"31_CR4","unstructured":"Poddar, S., Wan, Y., Ivison, H., Gupta, A., Jaques, N.: Personalizing reinforcement learning from human feedback with variational preference learning. arXiv preprint arXiv:2408.10075 (2024)"},{"key":"31_CR5","first-page":"53728","volume":"36","author":"R Rafailov","year":"2023","unstructured":"Rafailov, R., Sharma, A., Mitchell, E., Manning, C.D., Ermon, S., Finn, C.: Direct preference optimization: your language model is secretly a reward model. Adv. Neural. Inf. Process. Syst. 36, 53728\u201353741 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"31_CR6","unstructured":"Bai, Y., et\u00a0al.: Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862 (2022)"},{"key":"31_CR7","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"31_CR8","doi-asserted-by":"crossref","unstructured":"Hong, J., Lee, N., Thorne, J.: ORPO: monolithic preference optimization without reference model. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 11170\u201311189 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.626"},{"key":"31_CR9","first-page":"30039","volume":"36","author":"Y Dubois","year":"2023","unstructured":"Dubois, Y., et al.: AlpacaFarm: a simulation framework for methods that learn from human feedback. Adv. Neural. Inf. Process. Syst. 36, 30039\u201330069 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"31_CR10","doi-asserted-by":"crossref","unstructured":"Liu, C., Wang, Q., Lin, W., Ding, Y., Lu, H.: Beyond binary preference: leveraging Bayesian approaches for joint optimization of ranking and calibration. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 5442\u20135453 (2024)","DOI":"10.1145\/3637528.3671577"},{"key":"31_CR11","unstructured":"Im, S., Li, Y.: On the generalization of preference learning with DPO. arXiv e-prints, pages arXiv\u20132408 (2024)"},{"key":"31_CR12","doi-asserted-by":"crossref","unstructured":"Pham, C.M., Sun, S., Iyyer, M.: Suri: multi-constraint instruction following for long-form text generation. arXiv preprint arXiv:2406.19371 (2024)","DOI":"10.18653\/v1\/2024.findings-emnlp.94"},{"key":"31_CR13","unstructured":"Yin, Y., Wang, Z., Xie, Y., Chen, W., Zhou, M.: Self-augmented preference optimization: off-policy paradigms for language model alignment. arXiv preprint arXiv:2405.20830 (2024)"},{"key":"31_CR14","unstructured":"Chung, J.J.Y., Padmakumar, V., Roemmele, M., Sun, Y., Kreminski, M.: Modifying large language model post-training for diverse creative writing. arXiv preprint arXiv:2503.17126 (2025)"},{"key":"31_CR15","doi-asserted-by":"crossref","unstructured":"Tenney, I., Das, D., Pavlick, E.: BERT rediscovers the classical NLP pipeline. arXiv preprint arXiv:1905.05950 (2019)","DOI":"10.18653\/v1\/P19-1452"},{"key":"31_CR16","unstructured":"Azar, M.G., et al.: A general theoretical paradigm to understand learning from human preferences. In: International Conference on Artificial Intelligence and Statistics, pp. 4447\u20134455. PMLR (2024)"},{"key":"31_CR17","unstructured":"Ethayarajh, K., Xu, W., Muennighoff, N., Jurafsky, D., Kiela, D.: KTO: model alignment as prospect theoretic optimization. arXiv preprint arXiv:2402.01306 (2024)"},{"key":"31_CR18","doi-asserted-by":"crossref","unstructured":"Song, F., et al.: Preference ranking optimization for human alignment. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, pp. 18990\u201318998 (2024)","DOI":"10.1609\/aaai.v38i17.29865"},{"issue":"1","key":"31_CR19","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1007\/s00371-024-03314-5","volume":"41","author":"C Zhao","year":"2025","unstructured":"Zhao, C., Cai, W.-L., Yuan, Z.: Spectral normalization and dual contrastive regularization for image-to-image translation. Vis. Comput. 41(1), 129\u2013140 (2025)","journal-title":"Vis. Comput."},{"key":"31_CR20","unstructured":"M\u00fcller, R., Kornblith, S., Hinton, G.E.: When does label smoothing help? Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"31_CR21","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"31_CR22","unstructured":"Wei, J., et al.: Finetuned language models are zero-shot learners. arXiv preprint arXiv:2109.01652 (2021)"},{"key":"31_CR23","unstructured":"Touvron, H., et\u00a0al.: LLaMA 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"31_CR24","doi-asserted-by":"publisher","first-page":"110938","DOI":"10.1016\/j.patcog.2024.110938","volume":"157","author":"K Liu","year":"2025","unstructured":"Liu, K., Chen, K., Jia, K., Wang, Y.: Improving deep representation learning via auxiliary learnable target coding. Pattern Recogn. 157, 110938 (2025)","journal-title":"Pattern Recogn."},{"key":"31_CR25","unstructured":"Sellami, K., Saied, M.A.: Contrastive learning-enhanced large language models for monolith-to-microservice decomposition. arXiv preprint arXiv:2502.04604 (2025)"},{"key":"31_CR26","doi-asserted-by":"crossref","unstructured":"Ding, N., et al.: Sparse low-rank adaptation of pre-trained language models. arXiv preprint arXiv:2311.11696 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.252"},{"key":"31_CR27","unstructured":"Afzali, A., Khodabandeh, B., Rasekh, A., JafariNodeh, M., Gottschalk, S., et\u00a0al.: Aligning visual contrastive learning models via preference optimization. arXiv e-prints, pages arXiv\u20132411 (2024)"},{"key":"31_CR28","unstructured":"Wang, Y., et al.: Self-instruct: aligning language models with self-generated instructions. arXiv preprint arXiv:2212.10560 (2022)"},{"key":"31_CR29","first-page":"46595","volume":"36","author":"L Zheng","year":"2023","unstructured":"Zheng, L., et al.: Judging LLM-as-a-judge with mt-bench and chatbot arena. Adv. Neural. Inf. Process. Syst. 36, 46595\u201346623 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"31_CR30","unstructured":"Li, Y., Lu, Y., Dong, Z., Yang, C., Chen, Y., Gou, J.: SGLP: a similarity guided fast layer partition pruning for compressing large deep models. arXiv preprint arXiv:2410.14720 (2024)"}],"container-title":["Lecture Notes in Computer Science","PRICAI 2025: Trends in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-7078-2_31","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T09:22:15Z","timestamp":1776676935000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-7078-2_31"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819570775","9789819570782"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-7078-2_31","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"21 April 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific Rim International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wellington","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 November 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pricai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.pricai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}