{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:28:57Z","timestamp":1775230137285,"version":"3.50.1"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10888133","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T13:52:43Z","timestamp":1741787563000},"page":"1-5","source":"Crossref","is-referenced-by-count":6,"title":["HDMoLE: Mixture of LoRA Experts with Hierarchical Routing and Dynamic Thresholds for Fine-Tuning LLM-based ASR Models"],"prefix":"10.1109","author":[{"given":"Bingshen","family":"Mu","sequence":"first","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science,Xi'an,China"}]},{"given":"Kun","family":"Wei","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science,Xi'an,China"}]},{"given":"Qijie","family":"Shao","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science,Xi'an,China"}]},{"given":"Yong","family":"Xu","sequence":"additional","affiliation":[{"name":"Tencent AI Lab,Shenzhen,China"}]},{"given":"Lei","family":"Xie","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science,Xi'an,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Training language models to follow instructions with human feedback","volume-title":"Proc. NeurIPS","author":"Ouyang"},{"key":"ref2","article-title":"GPT-4 Technical Report","author":"Achiam","year":"2023"},{"key":"ref3","article-title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","author":"Touvron","year":"2023"},{"key":"ref4","article-title":"Qwen2 Technical Report","author":"Yang","year":"2024"},{"key":"ref5","article-title":"Qwen2-Audio Technical Report","author":"Chu","year":"2024"},{"key":"ref6","article-title":"Seed-ASR: Understanding Diverse Speech and Contexts with LLM-based Speech Recognition","author":"Bai","year":"2024"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ISCSLP63861.2024.10800077"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447563"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2024.3432275"},{"key":"ref10","article-title":"Mixture of lora experts","volume-title":"Proc. ICLR","author":"Wu"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.106"},{"key":"ref12","article-title":"Few-Shot Parameter-Efficient Fine-Tuning is Better and Cheaper than In-Context Learning","volume-title":"Proc. NeurIPS","author":"Liu"},{"key":"ref13","first-page":"2790","article-title":"Parameter-Efficient Transfer Learning for NLP","volume-title":"Proc. ICML","author":"Houlsby"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.1"},{"key":"ref17","article-title":"LoRA: Low-Rank Adaptation of Large Language Models","volume-title":"Proc. ICLR","author":"Hu"},{"key":"ref18","article-title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer","volume-title":"Proc. ICLR","author":"Shazeer"},{"key":"ref19","article-title":"A Review of Sparse Expert Models in Deep Learning","author":"Fedus","year":"2022"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ipdpsw55747.2022.00171"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i11.26617"},{"key":"ref22","article-title":"U2++ MoE: Scaling 4.7 x parameters with minimal impact on RTF","author":"Song","year":"2024"},{"key":"ref23","article-title":"MOELoRA: An MOE-based Parameter Efficient Fine-Tuning Method for Multi-task Medical Applications","author":"Liu","year":"2023"},{"key":"ref24","first-page":"11 371","article-title":"Mixture-of-LoRAs: An Efficient Multitask Tuning Method for Large Language Models","volume-title":"Proc. LREC-COLING","author":"Feng"},{"key":"ref25","article-title":"SiRA: Sparse Mixture of Low Rank Adaptation","author":"Zhu","year":"2023"},{"key":"ref26","article-title":"MixLoRA: Enhancing Large Language Models Fine-Tuning with LoRA based Mixture of Experts","author":"Li","year":"2024"},{"key":"ref27","article-title":"MoRAL: MoE Augmented LoRA for LLMs\u2019 Lifelong Learning","author":"Yang","year":"2024"},{"key":"ref28","article-title":"AdaMoLE: Fine-Tuning Large Language Models with Adaptive Mixture of Low-Rank Adaptation Experts","author":"Liu","year":"2024"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1847"},{"key":"ref30","article-title":"KeSpeech: An Open Source Speech Dataset of Mandarin and Its Eight Subdialects","volume-title":"Proc. NeurIPS Datasets and Benchmarks Track","author":"Tang"},{"key":"ref31","article-title":"AISHELL-2: Transforming Mandarin ASR Research Into Industrial Scale","author":"Du","year":"2018"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746682"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICSDA.2017.8384449"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1397"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-3015"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10888133.pdf?arnumber=10888133","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:21:05Z","timestamp":1774416065000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10888133\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10888133","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}