{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:50:33Z","timestamp":1755795033464,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737215","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:04:26Z","timestamp":1754255066000},"page":"4499-4510","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing Learned Knowledge in LoRA Adapters Through Efficient Contrastive Decoding on Ascend NPUs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4001-5939","authenticated-orcid":false,"given":"Morgan Lindsay","family":"Heisler","sequence":"first","affiliation":[{"name":"Huawei Technologies Canada, Burnaby, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5565-3757","authenticated-orcid":false,"given":"Linzi","family":"Xing","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Burnaby, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1666-1886","authenticated-orcid":false,"given":"Ge","family":"Shi","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Burnaby, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6467-4387","authenticated-orcid":false,"given":"Hanieh","family":"Sadri","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Burnaby, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0057-0455","authenticated-orcid":false,"given":"Gursimran","family":"Singh","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Burnaby, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3967-2472","authenticated-orcid":false,"given":"Weiwei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Toronto, ON, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1003-763X","authenticated-orcid":false,"given":"Tao","family":"Ye","sequence":"additional","affiliation":[{"name":"Huawei Technologies, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5985-3300","authenticated-orcid":false,"given":"Ying","family":"Xiong","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Burnaby, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0238-0719","authenticated-orcid":false,"given":"Yong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Burnaby, BC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5116-2956","authenticated-orcid":false,"given":"Zhenan","family":"Fan","sequence":"additional","affiliation":[{"name":"Huawei Technologies Canada, Burnaby, BC, Canada"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"crossref","unstructured":"Esteban Garces Arias Julian Rodemann Meimingwei Li Christian Heumann and Matthias A\u00dfenmacher. 2024. Adaptive Contrastive Search: Uncertainty-Guided Decoding for Open-Ended Text Generation. arXiv:2407.18698 [cs.CL] https:\/\/arxiv.org\/abs\/2407.18698","DOI":"10.18653\/v1\/2024.findings-emnlp.885"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.1"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3700604"},{"key":"e_1_3_2_2_4_1","volume-title":"Accelerating large language model decoding with speculative sampling. arXiv preprint arXiv:2302.01318","author":"Chen Charlie","year":"2023","unstructured":"Charlie Chen, Sebastian Borgeaud, Geoffrey Irving, Jean-Baptiste Lespiau, Laurent Sifre, and John Jumper. 2023. Accelerating large language model decoding with speculative sampling. arXiv preprint arXiv:2302.01318 (2023)."},{"key":"e_1_3_2_2_5_1","first-page":"1","article-title":"Punica: Multi-tenant lora serving","volume":"6","author":"Chen Lequn","year":"2024","unstructured":"Lequn Chen, Zihao Ye, Yongji Wu, Danyang Zhuo, Luis Ceze, and Arvind Krishnamurthy. 2024. Punica: Multi-tenant lora serving. Proceedings of Machine Learning and Systems 6 (2024), 1-13.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671576"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Alexandra Chronopoulou Matthew E. Peters Alexander Fraser and Jesse Dodge. 2023. AdapterSoup: Weight Averaging to Improve Generalization of Pretrained Language Models. arXiv:2302.07027 [cs.CL] https:\/\/arxiv.org\/abs\/2302.07027","DOI":"10.18653\/v1\/2023.findings-eacl.153"},{"key":"e_1_3_2_2_8_1","volume-title":"Training Verifiers to Solve Math Word Problems. CoRR abs\/2110.14168","author":"Cobbe Karl","year":"2021","unstructured":"Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser, Matthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, Christopher Hesse, and John Schulman. 2021. Training Verifiers to Solve Math Word Problems. CoRR abs\/2110.14168 (2021). arXiv:2110.14168 https:\/\/arxiv.org\/ abs\/2110.14168"},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Hayou Soufiane","year":"2024","unstructured":"Soufiane Hayou, Nikhil Ghosh, and Bin Yu. 2024. LoRA: efficient low rank adaptation of large models. In Proceedings of the 41st International Conference on Machine Learning (Vienna, Austria) (ICML'24). JMLR.org, Article 712, 24 pages."},{"key":"e_1_3_2_2_10_1","volume-title":"The Curious Case of Neural Text Degeneration. In 8th International Conference on Learning Representations, ICLR 2020","author":"Holtzman Ari","year":"2020","unstructured":"Ari Holtzman, Jan Buys, Li Du, Maxwell Forbes, and Yejin Choi. 2020. The Curious Case of Neural Text Degeneration. In 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020. OpenReview.net. https:\/\/openreview.net\/forum?id=rygGQyrFvH"},{"key":"e_1_3_2_2_11_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https: \/\/openreview.net\/forum?id=nZeVKeeFYf9","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, yelong shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https: \/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1365"},{"key":"e_1_3_2_2_13_1","first-page":"3","article-title":"Phi-2: The surprising power of small language models","volume":"1","author":"Javaheripi Mojan","year":"2023","unstructured":"Mojan Javaheripi, S\u00e9bastien Bubeck, Marah Abdin, Jyoti Aneja, Sebastien Bubeck, Caio C\u00e9sar Teodoro Mendes, Weizhu Chen, Allie Del Giorno, Ronen Eldan, Sivakanth Gopi, et al. 2023. Phi-2: The surprising power of small language models. Microsoft Research Blog 1, 3 (2023), 3.","journal-title":"Microsoft Research Blog"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-8623"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","unstructured":"Xiang Lisa Li Ari Holtzman Daniel Fried Percy Liang Jason Eisner Tatsunori Hashimoto Luke Zettlemoyer and Mike Lewis. 2023. Contrastive Decoding: Open-ended Text Generation as Optimization. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) Anna Rogers Jordan Boyd-Graber and Naoaki Okazaki (Eds.). Association for Computational Linguistics Toronto Canada 12286-12312. doi:10.18653\/v1\/2023. acl-long.687","DOI":"10.18653\/v1\/2023"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00071"},{"key":"e_1_3_2_2_18_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74-81. https:\/\/aclanthology.org\/W04-1013\/"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K16-1028"},{"key":"e_1_3_2_2_20_1","unstructured":"Sean O'Brien and Mike Lewis. 2023. Contrastive Decoding Improves Reasoning in Large Language Models. arXiv:2309.09117 [cs.CL] https:\/\/arxiv.org\/abs\/2309. 09117"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083"},{"key":"e_1_3_2_2_22_1","unstructured":"Venkatesh Balavadhani Parthasarathy Ahtsham Zafar Aafaq Khan and Arsalan Shahid. 2024. The Ultimate Guide to Fine-Tuning LLMs from Basics to Breakthroughs: An Exhaustive Review of Technologies Research Best Practices Applied Research Challenges and Opportunities. arXiv:2408.13296 [cs.LG] https:\/\/arxiv.org\/abs\/2408.13296"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.7"},{"key":"e_1_3_2_2_24_1","unstructured":"Phuc Phan Hieu Tran and Long Phan. 2024. Distillation Contrastive Decoding: Improving LLMs Reasoning with Contrastive Decoding and Distillation. arXiv:2402.14874 [cs.CL] https:\/\/arxiv.org\/abs\/2402.14874"},{"key":"e_1_3_2_2_25_1","volume-title":"Harish Tayyar Madabushi, and Iryna Gurevych","author":"Puerto Haritz","year":"2024","unstructured":"Haritz Puerto, Tilek Chubakov, Xiaodan Zhu, Harish Tayyar Madabushi, and Iryna Gurevych. 2024. Fine-Tuning with Divergent Chains of Thought Boosts Reasoning Through Self-Correction in Language Models. arXiv:2407.03181 [cs.CL] https:\/\/arxiv.org\/abs\/2407.03181"},{"key":"e_1_3_2_2_26_1","unstructured":"Amine Saidi. 2024. AmineSaidi-ISTIC\/phi-2-finetuned-gsm8k. https:\/\/huggingface. co\/AmineSaidi-ISTIC\/phi-2-finetuned-gsm8k"},{"key":"e_1_3_2_2_27_1","unstructured":"Ying Sheng Shiyi Cao Dacheng Li Coleman Hooper Nicholas Lee Shuo Yang Christopher Chou Banghua Zhu Lianmin Zheng Kurt Keutzer Joseph Gonzalez and Ion Stoica. 2024. SLoRA: Scalable Serving of Thousands of LoRA Adapters. In MLSys. https:\/\/proceedings.mlsys.org\/paper_files\/paper\/2024\/hash\/ 906419cd502575b617cc489a1a696a67-Abstract-Conference.html"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.489"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1421"},{"key":"e_1_3_2_2_30_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:2307.09288 [cs.CL] https:\/\/arxiv.org\/abs\/2307.09288"},{"key":"e_1_3_2_2_31_1","volume-title":"Representation Learning with Contrastive Predictive Coding. CoRR abs\/1807.03748","author":"van den Oord A\u00e4ron","year":"2018","unstructured":"A\u00e4ron van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation Learning with Contrastive Predictive Coding. CoRR abs\/1807.03748 (2018). arXiv:1807.03748 http:\/\/arxiv.org\/abs\/1807.03748"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671583"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671537"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664190.3672522"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/3692070.3694652"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737215","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T14:46:12Z","timestamp":1755355572000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737215"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":35,"alternative-id":["10.1145\/3711896.3737215","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737215","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}