{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T21:55:31Z","timestamp":1769550931487,"version":"3.49.0"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s11263-025-02434-2","type":"journal-article","created":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T06:20:01Z","timestamp":1745562001000},"page":"5396-5412","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Data-Adaptive Weight-Ensembling for Multi-task Model Fusion"],"prefix":"10.1007","volume":"133","author":[{"given":"Anke","family":"Tang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5659-3464","authenticated-orcid":false,"given":"Li","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yong","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shiwei","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Han","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dacheng","family":"Tao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"2434_CR1","unstructured":"Ainsworth, S. K., Hayase, J., & Srinivasa, S. (2023). Git Re-Basin: Merging Models modulo Permutation Symmetries (No. arXiv:2209.04836). arXiv."},{"key":"2434_CR2","doi-asserted-by":"crossref","unstructured":"Alaluf, Y., Tov, O., Mokady, R., Gal, R., & Bermano, A. (2022). HyperStyle: StyleGAN Inversion with HyperNetworks for Real Image Editing. 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR) (pp. 18490\u201318500). New Orleans, LA, USA: IEEE.","DOI":"10.1109\/CVPR52688.2022.01796"},{"key":"2434_CR3","unstructured":"Benton, G. W., Maddox, W. J., Lotfi, S., & Wilson, A. G. (2021). Loss Surface Simplexes for Mode Connecting Volumes and Fast Ensembling (No. arXiv:2102.13042). arXiv."},{"issue":"9","key":"2434_CR4","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1007\/s10462-024-10862-8","volume":"57","author":"VK Chauhan","year":"2024","unstructured":"Chauhan, V. K., Zhou, J., Lu, P., Molaei, S., & Clifton, D. A. (2024). A brief review of hypernetworks in deep learning. Artificial Intelligence Review, 57(9), 250. https:\/\/doi.org\/10.1007\/s10462-024-10862-8","journal-title":"Artificial Intelligence Review"},{"issue":"10","key":"2434_CR5","doi-asserted-by":"publisher","first-page":"1865","DOI":"10.1109\/JPROC.2017.2675998","volume":"105","author":"G Cheng","year":"2017","unstructured":"Cheng, G., Han, J., & Lu, X. (2017). Remote Sensing Image Scene Classification: Benchmark and State of the Art. Proceedings of the IEEE, 105(10), 1865\u20131883. https:\/\/doi.org\/10.1109\/JPROC.2017.2675998","journal-title":"Proceedings of the IEEE"},{"key":"2434_CR6","unstructured":"Chung, H. W., Hou, L., Longpre, S., Zoph, B., Tay, Y., Fedus, W., & Wei, J. (2022). Scaling Instruction-Finetuned Language Models (No. arXiv:2210.11416). arXiv."},{"key":"2434_CR7","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., Mohamed, S., & Vedaldi, A. (2014). Describing Textures in the Wild. 2014 IEEE conference on computer vision and pattern recognition (pp. 3606\u20133613). Columbus, OH, USA: IEEE.","DOI":"10.1109\/CVPR.2014.461"},{"key":"2434_CR8","unstructured":"Daniel Freeman, C., & Bruna, J. (2017). Topology and geometry of half-rectified network optimization: 5th international conference on learning representations, ICLR 2017."},{"key":"2434_CR9","doi-asserted-by":"crossref","unstructured":"Do, T., Khiem, L., Pham, Q., Nguyen, T., Doan, T.- N., Nguyen, B., & Hoi, S. (2023). HyperRouter: Towards efficient training and inference of sparse mixture of experts. H.\u00a0Bouamor, J.\u00a0Pino, and K.\u00a0Bali (Eds.), Proceedings of the 2023 conference on empirical methods in natural language processing (pp. 5754\u20135765). Singapore: Association for Computational Linguistics.","DOI":"10.18653\/v1\/2023.emnlp-main.351"},{"key":"2434_CR10","unstructured":"Draxler, F., Veschgini, K., Salmhofer, M., & Hamprecht, F.A. (2019). Essentially No Barriers in Neural Network Energy Landscape (No. arXiv:1803.00885). arXiv."},{"key":"2434_CR11","unstructured":"Entezari, R., Sedghi, H., Saukh, O., & Neyshabur, B. (2022). The Role of Permutation Invariance in Linear Mode Connectivity of Neural Networks (No. arXiv:2110.06296). arXiv."},{"key":"2434_CR12","unstructured":"Frankle, J., Dziugaite, G. K., Roy, D. M., & Carbin, M. (2020). Linear Mode Connectivity and the Lottery Ticket Hypothesis (No. arXiv:1912.05671). arXiv."},{"key":"2434_CR13","unstructured":"Garipov, T., Izmailov, P., Podoprikhin, D., Vetrov, D., & Wilson, A. G. (2018). Loss Surfaces, Mode Connectivity, and Fast Ensembling of DNNs (No. arXiv:1802.10026). arXiv."},{"key":"2434_CR14","doi-asserted-by":"crossref","unstructured":"Gulati, A., Qin, J., Chiu, C.- C., Parmar, N., Zhang, Y., Yu, J., & Pang, R. (2020). Conformer: Convolution-augmented Transformer for Speech Recognition (No. arXiv:2005.08100). arXiv.","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"2434_CR15","unstructured":"Ha, D., Dai, A. M., & Le, Q. V. (2022). Hypernetworks. International conference on learning representations."},{"key":"2434_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., & Girshick, R. (2021). Masked Autoencoders Are Scalable Vision Learners (No. arXiv:2111.06377). arXiv.","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"2434_CR17","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2016-Decem, 770\u2013778, 10.1109\/CVPR.2016.90 arxiv:1512.03385","DOI":"10.1109\/CVPR.2016.90"},{"key":"2434_CR18","doi-asserted-by":"crossref","unstructured":"Helber, P., Bischke, B., Dengel, A., & Borth, D. (2018). Introducing eurosat: A novel dataset and deep learning benchmark for land use and land cover classification. Igarss 2018-2018 IEEE international geoscience and remote sensing symposium (pp. 204\u2013207).","DOI":"10.1109\/IGARSS.2018.8519248"},{"key":"2434_CR19","unstructured":"Hendrycks, D., & Dietterich, T. (2019). Benchmarking neural network robustness to common corruptions and perturbations (No. arXiv:1903.12261). arXiv."},{"key":"2434_CR20","unstructured":"Hu, E.J., Shen, Y., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S.. & Chen, W. (2021). LoRA: Low-Rank Adaptation of Large Language Models (No. arXiv:2106.09685). arXiv."},{"key":"2434_CR21","unstructured":"Huang, C., Liu, Q., Lin, B.Y., Pang, T., Du, C., & Lin, M. (2023). LoraHub: Efficient Cross-Task Generalization via Dynamic LoRA Composition (No. arXiv:2307.13269). arXiv."},{"key":"2434_CR22","unstructured":"Ilharco, G., Ribeiro, M.T., Wortsman, M., Gururangan, S., Schmidt, L., Hajishirzi, H., & Farhadi, A. (2023). Editing models with task arithmetic (No. arXiv:2212.04089). arXiv."},{"key":"2434_CR23","unstructured":"Izmailov, P., Podoprikhin, D., Garipov, T., Vetrov, D., & Wilson, A.G. (2019). Averaging weights leads to wider optima and better generalization (No. arXiv:1803.05407). arXiv."},{"key":"2434_CR24","unstructured":"Jin, X., Ren, X., Preotiuc-Pietro, D., & Cheng, P. (2023). Dataless knowledge fusion by merging weights of language models (No. arXiv:2212.09849). arXiv."},{"key":"2434_CR25","unstructured":"Kaddour, J. (2022). Stop wasting my time! saving days of imagenet and BERT training with latest weight averaging (No. arXiv:2209.14981). arXiv."},{"key":"2434_CR26","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., & Fei-Fei, L. (2013). 3D Object Representations for Fine-Grained Categorization. 2013 IEEE international conference on computer vision workshops (pp. 554\u2013561).","DOI":"10.1109\/ICCVW.2013.77"},{"issue":"11","key":"2434_CR27","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y Lecun","year":"1998","unstructured":"Lecun, Y., Bottou, L., Bengio, Y., & Haffner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86(11), 2278\u20132324. https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proceedings of the IEEE"},{"key":"2434_CR28","unstructured":"Li, W., Peng, Y., Zhang, M., Ding, L., Hu, H., & Shen, L. (2023). Deep Model Fusion: A Survey (No. arXiv:2309.15698). arXiv."},{"key":"2434_CR29","unstructured":"Li, Y., Yosinski, J., Clune, J., Lipson, H., & Hopcroft, J. (2016). Convergent Learning: Do different neural networks learn the same representations? (No. arXiv:1511.07543). arXiv."},{"key":"2434_CR30","unstructured":"Liang, J., He, R., & Tan, T. (2023). A Comprehensive Survey on Test-Time Adaptation under Distribution Shifts (No. arXiv:2303.15361). arXiv."},{"key":"2434_CR31","unstructured":"Liu, C., Lou, C., Wang, R., Xi, A.Y., Shen, L., & Yan, J. (2022). Deep Neural Network Fusion via Graph Matching with Applications to Model Ensemble and Federated Learning. Proceedings of the 39th International conference on machine learning (pp. 13857\u201313869). PMLR."},{"key":"2434_CR32","unstructured":"Lu, Z., Fan, C., Wei, W., Qu, X., Chen, D., & Cheng, Y. (2024). Twin-merging: dynamic integration of modular expertise in Model Merging. arXiv."},{"key":"2434_CR33","unstructured":"Matena, M., & Raffel, C. (2022). Merging Models with Fisher-Weighted Averaging (No. arXiv:2111.09832). arXiv."},{"key":"2434_CR34","doi-asserted-by":"crossref","unstructured":"Mounsaveng, S., Chiaroni, F., Boudiaf, M., Pedersoli, M., & Ayed, I.B. (2023). Bag of Tricks for Fully Test-Time Adaptation (No. arXiv:2310.02416). arXiv.","DOI":"10.1109\/WACV57701.2024.00194"},{"key":"2434_CR35","unstructured":"Nagarajan, V., & Kolter, J.Z. (2019). Uniform convergence may be unable to explain generalization in deep learning. Advances in Neural Information Processing Systems (Vol.\u00a032). Curran Associates, Inc."},{"key":"2434_CR36","unstructured":"Navon, A., Shamsian, A., Chechik, G., & Fetaya, E. (2021). Learning the Pareto Front with Hypernetworks (No. arXiv:2010.04104). arXiv."},{"key":"2434_CR37","unstructured":"Netzer, Y., Wang, T., Coates, A., Bissacco, A., Wu, B., & Ng, A.Y. (2021). Reading Digits in Natural Images with Unsupervised Feature Learning."},{"key":"2434_CR38","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., & Sutskever, I. (2021). Learning Transferable Visual Models From Natural Language Supervision (No. arXiv:2103.00020). arXiv."},{"issue":"8","key":"2434_CR39","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., & Sutskever, I. (2019). Language models are unsupervised multitask learners. OpenAI Blog, 1(8), 9.","journal-title":"OpenAI Blog"},{"key":"2434_CR40","unstructured":"Shamsian, A., Navon, A., Fetaya, E., & Chechik, G. (2021). Personalized Federated Learning using Hypernetworks (No. arXiv:2103.04628). arXiv."},{"key":"2434_CR41","unstructured":"Shen, L., Tang, A., Yang, E., Guo, G., Luo, Y., Zhang, L., & Tao, D. (2024). Efficient and effective weight-ensembling mixture of experts for multi-task model merging. arXiv preprint arXiv:2410.21804 ,"},{"key":"2434_CR42","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1016\/j.neunet.2012.02.016","volume":"32","author":"J Stallkamp","year":"2012","unstructured":"Stallkamp, J., Schlipsing, M., Salmen, J., & Igel, C. (2012). Man vs. computer: Benchmarking machine learning algorithms for traffic sign recognition. Neural Networks, 32, 323\u2013332. https:\/\/doi.org\/10.1016\/j.neunet.2012.02.016","journal-title":"Neural Networks"},{"key":"2434_CR43","unstructured":"Stoica, George, Bolya, Daniel, Bjorner, Jakob, Hearn, Taylor, & Hoffman, Judy. (2023). ZipIt! Merging Models from Different Tasks without Training (No. arXiv:2305.03053). arXiv."},{"key":"2434_CR44","unstructured":"Sun, Z., Ozay, M., & Okatani, T. (2017). HyperNetworks with statistical filtering for defending adversarial examples (No. arXiv:1711.01791). arXiv."},{"key":"2434_CR45","unstructured":"Tam, D., Bansal, M., & Raffel, C. (2023). Merging by Matching Models in Task Subspaces (No. arXiv:2312.04339). arXiv."},{"key":"2434_CR46","unstructured":"Tang, A., Shen, L., Luo, Y., Liu, S., Hu, H., & Du, B. (2024). Towards efficient pareto set approximation via mixture of experts based model fusion. arXiv preprint arXiv:2406.09770,"},{"key":"2434_CR47","unstructured":"Tang, A., Shen, L., Luo, Y., Xie, S., Hu, H., Zhang, L., & Tao, D. (2024). SMILE: Zero-shot sparse mixture of low-rank experts construction from pre-trained foundation models. arXiv."},{"key":"2434_CR48","unstructured":"Tatro, N., Chen, P.- Y., Das, P., Melnyk, I., Sattigeri, P., & Lai, R. (2020). Optimizing Mode Connectivity via Neuron Alignment. Advances in Neural Information Processing Systems (Vol.\u00a033, pp. 15300\u201315311). Curran Associates, Inc."},{"key":"2434_CR49","unstructured":"von Oswald, J., Henning, C., Grewe, B.F., & Sacramento, J. (2022). Continual learning with hypernetworks (No. arXiv:1906.00695). arXiv."},{"key":"2434_CR50","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., & Bowman, S. (2018). GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP (pp. 353\u2013355). Brussels, Belgium: Association for Computational Linguistics.","DOI":"10.18653\/v1\/W18-5446"},{"key":"2434_CR51","unstructured":"Wortsman, M., Ilharco, G., Gadre, S.Y., Roelofs, R., Gontijo-Lopes, R., Morcos, A.S., & Schmidt, L. (2022). Model soups: averaging weights of multiple fine-tuned models improves accuracy without increasing inference time (No. arXiv:2203.05482). arXiv."},{"key":"2434_CR52","doi-asserted-by":"crossref","unstructured":"Xiao, J., Hays, J., Ehinger, K. A., Oliva, A., & Torralba, A. (2010). SUN database: Large-scale scene recognition from abbey to zoo. 2010 IEEE computer society conference on computer vision and pattern recognition (pp. 3485\u20133492). San Francisco, CA, USA: IEEE.","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"2434_CR53","unstructured":"Yadav, P., Raffel, C., Muqeeth, M., Caccia, L., Liu, H., Chen, T., & Sordoni, A. (2024). A Survey on Model MoErging: recycling and routing among specialized experts for collaborative learning. arXiv."},{"key":"2434_CR54","unstructured":"Yadav, P., Tam, D., Choshen, L., Raffel, C., & Bansal, M. (2023). Resolving Interference When Merging Models (No. arXiv:2306.01708). arXiv."},{"key":"2434_CR55","unstructured":"Yang, E., Shen, L., Guo, G., Wang, X., Cao, X., Zhang, J., & Tao, D. (2024). Model merging in llms, mllms, and beyond: Methods, theories, applications and opportunities. arXiv preprint arXiv:2408.07666,"},{"key":"2434_CR56","unstructured":"Yang, G., Simon, J.B., & Bernstein, J. (2023). A Spectral Condition for Feature Learning (No. arXiv:2310.17813). arXiv."},{"key":"2434_CR57","unstructured":"Yang, E., Wang, Z., Shen, L., Liu, S., Guo, G., Wang, X., & Tao, D. (2023). AdaMerging: Adaptive Model Merging for Multi-Task Learning (No. arXiv:2310.02575). arXiv."},{"key":"2434_CR58","unstructured":"Yu, L., Yu, B., Yu, H., Huang, F., & Li, Y. (2023). Language Models are Super Mario: Absorbing Abilities from Homologous Models as a Free Lunch (No. arXiv:2311.03099). arXiv."},{"key":"2434_CR59","unstructured":"Yunis, D., Patel, K.K., Savarese, P., Vardi, G., Livescu, K., Walter, M., & Maire, M. (2022). On Convexity and Linear Mode Connectivity in Neural Networks. OPT2022: 14th annual workshop on optimization for machine learning"},{"key":"2434_CR60","unstructured":"Zheng, H., Shen, L., Tang, A., Luo, Y., Hu, H., Du, B., & Tao, D. (2023). Learn From Model Beyond Fine-Tuning: A Survey (No. arXiv:2310.08184). arXiv."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02434-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02434-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02434-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,24]],"date-time":"2025-07-24T05:21:49Z","timestamp":1753334509000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02434-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":60,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["2434"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02434-2","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"1 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 April 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}