{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T04:53:12Z","timestamp":1774500792551,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":17,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,2]],"date-time":"2024-02-02T00:00:00Z","timestamp":1706832000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,2,2]]},"DOI":"10.1145\/3651671.3651702","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T18:55:50Z","timestamp":1717786550000},"page":"128-133","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Fine-Tuning LLMs for Multi-Turn Dialogues: Optimizing Cross-Entropy Loss with KL Divergence for All Rounds of Responses"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-6544-4977","authenticated-orcid":false,"given":"Zeyu","family":"Teng","sequence":"first","affiliation":[{"name":"AsiaInfo Technologies (China) Co., Ltd., China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0097-5281","authenticated-orcid":false,"given":"Yong","family":"Song","sequence":"additional","affiliation":[{"name":"AsiaInfo Technologies (China) Co., Ltd., China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4925-5907","authenticated-orcid":false,"given":"Xiaozhou","family":"Ye","sequence":"additional","affiliation":[{"name":"AsiaInfo Technologies (China) Co., Ltd., China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6195-6415","authenticated-orcid":false,"given":"Ye","family":"Ouyang","sequence":"additional","affiliation":[{"name":"AsiaInfo Technologies (Guangzhou) Co., Ltd., China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Extending context window of large language models via positional interpolation. arXiv preprint arXiv:2306.15595","author":"Chen Shouyuan","year":"2023","unstructured":"Shouyuan Chen, Sherman Wong, Liangjian Chen, and Yuandong Tian. 2023. Extending context window of large language models via positional interpolation. arXiv preprint arXiv:2306.15595 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph\u00a0E Gonzalez, 2023. Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed 14 April 2023) (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"doc2dial: A goal-oriented document-grounded dialogue dataset. arXiv preprint arXiv:2011.06623","author":"Feng Song","year":"2020","unstructured":"Song Feng, Hui Wan, Chulaka Gunasekara, Siva\u00a0Sankalp Patel, Sachindra Joshi, and Luis\u00a0A Lastras. 2020. doc2dial: A goal-oriented document-grounded dialogue dataset. arXiv preprint arXiv:2011.06623 (2020)."},{"key":"e_1_3_2_1_4_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu J","year":"2021","unstructured":"Edward\u00a0J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_5_1","volume-title":"Neural tangent kernel: Convergence and generalization in neural networks. Advances in neural information processing systems 31","author":"Jacot Arthur","year":"2018","unstructured":"Arthur Jacot, Franck Gabriel, and Cl\u00e9ment Hongler. 2018. Neural tangent kernel: Convergence and generalization in neural networks. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_1_6_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_2_1_8_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_9_1","volume-title":"Moss: Training conversational language models from synthetic data. arXiv preprint arXiv:2307.15020 7","author":"Sun Tianxiang","year":"2023","unstructured":"Tianxiang Sun, Xiaotian Zhang, Zhengfu He, Peng Li, Qinyuan Cheng, Hang Yan, Xiangyang Liu, Yunfan Shao, Qiong Tang, Xingjian Zhao, 2023. Moss: Training conversational language models from synthetic data. arXiv preprint arXiv:2307.15020 7 (2023)."},{"key":"e_1_3_2_1_10_1","volume-title":"ChiMed-GPT: A Chinese Medical Large Language Model with Full Training Regime and Better Alignment to Human Preferences. arXiv preprint arXiv:2311.06025","author":"Tian Yuanhe","year":"2023","unstructured":"Yuanhe Tian, Ruyi Gan, Yan Song, Jiaxing Zhang, and Yongdong Zhang. 2023. ChiMed-GPT: A Chinese Medical Large Language Model with Full Training Regime and Better Alignment to Human Preferences. arXiv preprint arXiv:2311.06025 (2023)."},{"key":"e_1_3_2_1_11_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_12_1","volume-title":"Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_14_1","volume-title":"Self-instruct: Aligning language model with self generated instructions. arXiv preprint arXiv:2212.10560","author":"Wang Yizhong","year":"2022","unstructured":"Yizhong Wang, Yeganeh Kordi, Swaroop Mishra, Alisa Liu, Noah\u00a0A Smith, Daniel Khashabi, and Hannaneh Hajishirzi. 2022. Self-instruct: Aligning language model with self generated instructions. arXiv preprint arXiv:2212.10560 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"Finetuned language models are zero-shot learners. arXiv preprint arXiv:2109.01652","author":"Wei Jason","year":"2021","unstructured":"Jason Wei, Maarten Bosma, Vincent\u00a0Y Zhao, Kelvin Guu, Adams\u00a0Wei Yu, Brian Lester, Nan Du, Andrew\u00a0M Dai, and Quoc\u00a0V Le. 2021. Finetuned language models are zero-shot learners. arXiv preprint arXiv:2109.01652 (2021)."},{"key":"e_1_3_2_1_16_1","volume-title":"Workshop, Teven\u00a0Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, Daniel Hesslow, Roman Castagn\u00e9, Alexandra\u00a0Sasha Luccioni","year":"2022","unstructured":"BigScience Workshop, Teven\u00a0Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, Daniel Hesslow, Roman Castagn\u00e9, Alexandra\u00a0Sasha Luccioni, Fran\u00e7ois Yvon, 2022. Bloom: A 176b-parameter open-access multilingual language model. arXiv preprint arXiv:2211.05100 (2022)."},{"key":"e_1_3_2_1_17_1","volume-title":"Doctorglm: Fine-tuning your chinese doctor is not a herculean task. arXiv preprint arXiv:2304.01097","author":"Xiong Honglin","year":"2023","unstructured":"Honglin Xiong, Sheng Wang, Yitao Zhu, Zihao Zhao, Yuxiao Liu, Qian Wang, and Dinggang Shen. 2023. Doctorglm: Fine-tuning your chinese doctor is not a herculean task. arXiv preprint arXiv:2304.01097 (2023)."}],"event":{"name":"ICMLC 2024: 2024 16th International Conference on Machine Learning and Computing","location":"Shenzhen China","acronym":"ICMLC 2024"},"container-title":["Proceedings of the 2024 16th International Conference on Machine Learning and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651702","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3651671.3651702","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T11:21:24Z","timestamp":1755861684000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651702"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,2]]},"references-count":17,"alternative-id":["10.1145\/3651671.3651702","10.1145\/3651671"],"URL":"https:\/\/doi.org\/10.1145\/3651671.3651702","relation":{},"subject":[],"published":{"date-parts":[[2024,2,2]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}