{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,15]],"date-time":"2025-08-15T01:39:28Z","timestamp":1755221968875,"version":"3.43.0"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,8,9]],"date-time":"2025-08-09T00:00:00Z","timestamp":1754697600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,9]],"date-time":"2025-08-09T00:00:00Z","timestamp":1754697600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07727-4","type":"journal-article","created":{"date-parts":[[2025,8,9]],"date-time":"2025-08-09T12:59:05Z","timestamp":1754744345000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Slerp-Opt: merging large language models via adaptive strategies"],"prefix":"10.1007","volume":"81","author":[{"given":"Haiyin","family":"Jiang","sequence":"first","affiliation":[]},{"given":"Ruilin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Weijie","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Qi","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Xiang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yanan","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,9]]},"reference":[{"issue":"9","key":"7727_CR1","doi-asserted-by":"publisher","first-page":"1833","DOI":"10.1093\/jamia\/ocae045","volume":"31","author":"C Wu","year":"2024","unstructured":"Wu C, Lin W, Zhang X, Zhang Y, Xie W, Wang Y (2024) PMC-llama: toward building open-source language models for medicine. J Am Med Inform Assoc 31(9):1833\u20131843. https:\/\/doi.org\/10.1093\/jamia\/ocae045","journal-title":"J Am Med Inform Assoc"},{"key":"7727_CR2","unstructured":"Wu S, Irsoy O, Lu S, et al. (2023) Bloomberggpt: A large language model for finance. arXiv preprint arXiv:2303.17564"},{"key":"7727_CR3","doi-asserted-by":"crossref","unstructured":"Imani S, Du L, Shrivastava H (2023) MathPrompter: Mathematical Reasoning using Large Language Models. In: Proc 61st Annu Meet Assoc Comput Linguist (Vol 5: Industry Track), Toronto, Canada, pp 37\u201342. Assoc Comput Linguist. https:\/\/aclanthology.org\/2023.acl-industry.4\/","DOI":"10.18653\/v1\/2023.acl-industry.4"},{"key":"7727_CR4","unstructured":"Lewis P, Perez E, Piktus A, et al. (2020) Retrieval-augmented generation for knowledge-intensive NLP tasks. In: Proc 34th Int Conf Neural Inf Process Syst, pp 9459\u20139474."},{"key":"7727_CR5","doi-asserted-by":"publisher","unstructured":"Yao S, Zhao J, Yu D, et al. React: Synergizing Reasoning and Acting in Language Models[C]. In: International Conference on Learning Representations (ICLR). 2023. arXiv:https:\/\/doi.org\/10.48550\/arXiv.2210.03629","DOI":"10.48550\/arXiv.2210.03629"},{"issue":"251","key":"7727_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.48550\/arXiv.2208.03299","volume":"24","author":"G Izacard","year":"2023","unstructured":"Izacard G, Lewis P, Lomeli M et al (2023) Atlas: few-shot learning with retrieval augmented language models[J]. J Mach Learn Res 24(251):1\u201343. https:\/\/doi.org\/10.48550\/arXiv.2208.03299","journal-title":"J Mach Learn Res"},{"key":"7727_CR7","doi-asserted-by":"publisher","first-page":"68539","DOI":"10.48550\/arXiv.2302.04761","volume":"36","author":"T Schick","year":"2023","unstructured":"Schick T, Dwivedi-Yu J, Dess\u00ec R et al (2023) Toolformer: language models can teach themselves to use tools[J]. Adv Neural Inform Process Syst 36:68539\u201368551. https:\/\/doi.org\/10.48550\/arXiv.2302.04761","journal-title":"Adv Neural Inform Process Syst"},{"key":"7727_CR8","unstructured":"Wu T, Luo L, Li YF, et al. (2024) Continual learning for large language models: A survey. arXiv preprint arXiv:2402.01364"},{"key":"7727_CR9","doi-asserted-by":"crossref","unstructured":"Zhao X, Zhou K, Zhang B, et al. (2023) Jiuzhang 2.0: A unified Chinese pre-trained language model for multi-task mathematical problem solving. In: Proc 29th ACM SIGKDD Conf Knowl Discov Data Min, pp 5660\u20135672.","DOI":"10.1145\/3580305.3599850"},{"key":"7727_CR10","unstructured":"Xue F, Zheng Z, Fu Y, et al. (2024) OpenMoE: An early effort on open mixture-of-experts language models. In: Proc 41st Int Conf Mach Learn"},{"key":"7727_CR11","doi-asserted-by":"crossref","unstructured":"Mu B, Wei K, Shao Q, et al. (2025) Hdmole: Mixture of Lora Experts with Hierarchical Routing and Dynamic Thresholds for Fine-Tuning LLM-Based asr Models[C]. In: ICASSP 2025\u20132025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1\u20135","DOI":"10.1109\/ICASSP49660.2025.10888133"},{"key":"7727_CR12","doi-asserted-by":"crossref","unstructured":"Du H, Liu G, Lin Y, et al. (2024) Mixture of Experts for Intelligent Networks: A Large Language Model-Enabled Approach[C]. In: 2024 International Wireless Communications and Mobile Computing (IWCMC). IEEE, 531\u2013536","DOI":"10.1109\/IWCMC61514.2024.10592370"},{"key":"7727_CR13","doi-asserted-by":"crossref","unstructured":"Shoemake K (1985) Animating rotation with quaternion curves. In: Proc 12th Annu Conf Comput Graph Interact Tech, pp 245\u2013254","DOI":"10.1145\/325334.325242"},{"key":"7727_CR14","doi-asserted-by":"crossref","unstructured":"Liu L, Zhang D, Li S, et al. (2024) Two heads are better than one: Zero-shot cognitive reasoning via multi-LLM knowledge fusion. In: Proc 33rd ACM Int Conf Inf Knowl Manag, pp 1462\u20131472","DOI":"10.1145\/3627673.3679744"},{"key":"7727_CR15","unstructured":"Jiang AQ, Sablayrolles A, Roux A, et al. (2024) Mixtral of experts. arXiv preprint arXiv:2401.04088"},{"key":"7727_CR16","first-page":"1776","volume":"2024","author":"G Perin","year":"2024","unstructured":"Perin G, Chen X, Liu S et al (2024) RankMean: module-level importance score for merging fine-tuned LLM models. Findings Assoc Comput Linguist: ACL 2024:1776\u20131782","journal-title":"Findings Assoc Comput Linguist: ACL"},{"key":"7727_CR17","unstructured":"Matena M, Raffel C (2022) Merging models with Fisher-weighted averaging. In: Proc 36th Int Conf Neural Inf Process Syst, pp 17703\u201317716"},{"key":"7727_CR18","unstructured":"Yu L, Yu B, Yu H, et al. (2024) Language models are super mario: absorbing abilities from homologous models as a free lunch. In: Proc 41st Int Conf Mach Learn"},{"issue":"9","key":"7727_CR19","doi-asserted-by":"publisher","first-page":"2287","DOI":"10.1109\/TPAMI.2019.2914054","volume":"42","author":"D Zhang","year":"2019","unstructured":"Zhang D, Wang L, Zhang L et al (2019) The gap of semantic parsing: a survey on automatic math word problem solvers. IEEE Trans Pattern Anal Mach Intell 42(9):2287\u20132305. https:\/\/doi.org\/10.1109\/TPAMI.2019.2914054","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"7727_CR20","unstructured":"Wei J, Wang X, Schuurmans D, et al. (2022) Chain-of-thought prompting elicits reasoning in large language models. In: Proc 36th Int Conf Neural Inf Process Syst, pp 24824\u201324837"},{"key":"7727_CR21","unstructured":"Wang X, Wei J, Schuurmans D, et al. (2024) Self-consistency improves chain of thought reasoning in language models. In: Proc 11th Int Conf Learn Represent"},{"key":"7727_CR22","unstructured":"Toshniwal S, Moshkov I, Narenthiran S, et al. (2024) OpenMathInstruct-1: A 1.8 million math instruction tuning dataset. arXiv preprint arXiv:2402.10176"},{"key":"7727_CR23","unstructured":"Gou Z, Shao Z, Gong Y, et al. (2024) ToRA: a tool-integrated reasoning agent for mathematical problem solving. In: Proc 12th Int Conf Learn Represent"},{"key":"7727_CR24","unstructured":"Mitra A, Khanpour H, Rosset C, et al. (2024) Orca-Math: Unlocking the potential of SLMs in grade school math. arXiv preprint arXiv:2402.14830"},{"key":"7727_CR25","unstructured":"Hu EJ, Shen Y, Wallis P, et al. (2021) LoRA: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685"},{"key":"7727_CR26","unstructured":"Contributors OC (2023) OpenCompass: a universal evaluation platform for foundation models. GitHub repository"},{"key":"7727_CR27","unstructured":"Bi X, Chen D, Chen G, et al. (2024) DeepSeek LLM: scaling open-source language models with longtermism. arXiv preprint arXiv:2401.02954"},{"key":"7727_CR28","unstructured":"Touvron H, Martin L, Stone K, et al. (2023) LLaMA 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288"},{"key":"7727_CR29","doi-asserted-by":"crossref","unstructured":"Chen N, Zheng Z, Wu N, et al. (2024) Breaking Language Barriers in Multilingual Mathematical Reasoning: Insights and Observations[C]. In: Findings of the Association for Computational Linguistics: EMNLP. 2024: 7001\u20137016","DOI":"10.18653\/v1\/2024.findings-emnlp.411"},{"key":"7727_CR30","unstructured":"Yue X, Qu X, Zhang G, et al. MAmmoTH: Building Math Generalist Models through Hybrid Instruction Tuning[C]. In: The Twelfth International Conference on Learning Representations"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07727-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07727-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07727-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,9]],"date-time":"2025-08-09T12:59:11Z","timestamp":1754744351000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07727-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,9]]},"references-count":30,"journal-issue":{"issue":"12","published-online":{"date-parts":[[2025,8]]}},"alternative-id":["7727"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07727-4","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,9]]},"assertion":[{"value":"30 July 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 August 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"1223"}}