{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:17:58Z","timestamp":1763191078358,"version":"3.45.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100017607","name":"Shenzhen Fundamental Research Program","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100017607","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11228602","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Mamba-based Layer-wise Progressive Fusion Network with Depthwise Enhancement for Low-resource Speech Recognition"],"prefix":"10.1109","author":[{"given":"Xuanda","family":"Chen","sequence":"first","affiliation":[{"name":"Shenzhen Research Institute of Shandong University,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dingxin","family":"Cheng","sequence":"additional","affiliation":[{"name":"Shenzhen Research Institute of Shandong University,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fei","family":"Hou","sequence":"additional","affiliation":[{"name":"Wendeng Branch of Weihai Public Security Bureau,Weihai,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Jiang","sequence":"additional","affiliation":[{"name":"Shenzhen Research Institute of Shandong University,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuchen","family":"Li","sequence":"additional","affiliation":[{"name":"Shenzhen Research Institute of Shandong University,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen Research Institute of Shandong University,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meixia","family":"Qu","sequence":"additional","affiliation":[{"name":"Shenzhen Research Institute of Shandong University,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/PRML56267.2022.9882228"},{"key":"ref2","first-page":"113","article-title":"Dialect identification through adversarial learning and knowledge distillation on romanian BERT","volume-title":"Proceedings of the Eighth Workshop on NLP for Similar Languages","author":"Zaharia"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.2478\/sm-2024-0013"},{"key":"ref4","article-title":"Speech recognition utilizing deep learning: A systematic review of the latest developments","volume":"14","author":"Al-Fraihat","year":"2024","journal-title":"Human-centric Computing and Information Sciences"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2677"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2928"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-994"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832137"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref10","first-page":"17627","article-title":"Branchformer: Parallel mlp-attention architectures to capture local and global context for speech recognition and understanding","volume-title":"International Conference on Machine Learning, ICML","volume":"162","author":"Peng"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.42"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1613"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447447"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3663548.3688536"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i09.7123"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-11"},{"article-title":"Common voice: A massively-multilingual speech corpus","year":"2019","author":"Ardila","key":"ref17"},{"article-title":"Automatic speech recognition datasets in cantonese: A survey and new dataset","year":"2022","author":"Yu","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1386"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2275"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"article-title":"Mamba: Linear-time sequence modeling with selective state spaces","year":"2023","author":"Gu","key":"ref23"},{"article-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","volume-title":"Forty-first International Conference on Machine Learning, ICML 2024","author":"Zhu","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49660.2025.10887599"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49660.2025.10889391"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1446"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2059"},{"key":"ref29","article-title":"Mlp-based architecture with variable length input for automatic speech recognition","author":"Sakuma","year":"2022","journal-title":"OpenReview"},{"article-title":"Speechbrain: A general-purpose speech toolkit","year":"2021","author":"Ravanelli","key":"ref30"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2025,6,30]]},"location":"Rome, Italy","end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11228602.pdf?arnumber=11228602","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:13:42Z","timestamp":1763190822000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11228602\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11228602","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}