{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T16:49:27Z","timestamp":1781282967870,"version":"3.54.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:00:00Z","timestamp":1743120000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:00:00Z","timestamp":1743120000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-025-03794-9","type":"journal-article","created":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T01:21:25Z","timestamp":1743384085000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Robust Multi-Dialect End-to-End ASR Model Jointly with Beam Search Threshold Pruning and LLM"],"prefix":"10.1007","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5341-7684","authenticated-orcid":false,"given":"M. C.","family":"Shunmuga Priya","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"D.","family":"Karthika Renuka","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"L.","family":"Ashok Kumar","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,3,28]]},"reference":[{"key":"3794_CR1","unstructured":"Chorowski J, Bahdanau D, Serdyuk D, Cho K, Bengio Y. Attention-based models for speech recognition. In Proceedings of the 28th International Conference on Neural Information Processing Systems - Volume 1 (NIPS\u201915). MIT Press, Cambridge, MA, USA, 2015;577-585."},{"key":"3794_CR2","doi-asserted-by":"crossref","unstructured":"Prabhavalkar R, Rao K, Sainath T.N, Li B, Johnson L.M, Jaitly, N. A Comparison of Sequence-to-Sequence Models for Speech Recognition. Interspeech;2017","DOI":"10.21437\/Interspeech.2017-233"},{"key":"3794_CR3","doi-asserted-by":"crossref","unstructured":"Chan W, Jaitly N, Le Q, Vinyals O. Listen, attend and spell: A neural network for large vocabulary conversational speech recognition\u2019, International Conference on Acoustics, Speech and Signal Processing (ICASSP), Shanghai, China, 2016;pp. 4960-4964","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"3794_CR4","doi-asserted-by":"crossref","unstructured":"Sainath TN, Vinyals O, Senior A, Sak, H. Convolutional, long short-term memory, fully connected deep neural networks. International Conference on Acoustics, Speech and Signal Processing (ICASSP), South Brisbane, QLD, Australia, 2015;pp. 4580-4584.","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"3794_CR5","unstructured":"Collobert R, Puhrsch C, Synnaeve G. Wav2letter: an end-to-end convnet-based speech recognition system;2016 arxiv."},{"key":"3794_CR6","doi-asserted-by":"crossref","unstructured":"Karita S. A comparative study on transformer vs RNN in speech applications. Automatic Speech Recognition and Understanding Workshop (ASRU) 2019;pp.449-456.","DOI":"10.1109\/ASRU46091.2019.9003750"},{"key":"3794_CR7","doi-asserted-by":"crossref","unstructured":"Zeyer A, Irie K, Schl\u00fcter R, Ney H. Improved training of end-to-end attention models for speech recognition, Proc. Interspeech, 2018;pp. 7-11.","DOI":"10.21437\/Interspeech.2018-1616"},{"key":"3794_CR8","doi-asserted-by":"crossref","unstructured":"Guo J, Sainath T, Weiss R. A spelling correction model for end-to-end speech recognition, 2019 Arxiv.","DOI":"10.1109\/ICASSP.2019.8683745"},{"key":"3794_CR9","doi-asserted-by":"crossref","unstructured":"Tsunoo E, Kashiwagi Y, Kumakura T, Watanabe S. Transformer ASR with Contextual Block Processing, Automatic Speech Recognition and Understanding Workshop (ASRU), 2019;pp.427-433.","DOI":"10.1109\/ASRU46091.2019.9003749"},{"key":"3794_CR10","unstructured":"Zeghidour N. Fully convolutional speech recognition, CoRR;2018 vol. abs\/1812.06864."},{"key":"3794_CR11","doi-asserted-by":"crossref","unstructured":"Li J, Lavrukhin V, Ginsburg B, Leary R, Kuchaiev O, Cohen J, Nguyen H, Gadde R. Jasper: an end-to-end convolutional neural acoustic model, 2019;pp.71\u201375.","DOI":"10.21437\/Interspeech.2019-1819"},{"key":"3794_CR12","unstructured":"Amodei D, Ananthanarayanan S, Anubhai R, Bai J, Battenberg E, Case C, Casper J, et al. Deep speech 2: end-to-end speech recognition in english and Mandarin, Proceedings of The 33rd International Conference on Machine Learning. vol. 48, 2016;pp. 173\u2013182."},{"key":"3794_CR13","doi-asserted-by":"crossref","unstructured":"Ali A, Renals S. Word error rate estimation without ASR output: e-WER2. 2020;pp. 616\u2013620.","DOI":"10.21437\/Interspeech.2020-2357"},{"key":"3794_CR14","doi-asserted-by":"publisher","first-page":"3673","DOI":"10.1007\/s10462-019-09775-8","volume":"53","author":"A Singh","year":"2020","unstructured":"Singh A, Kadyan V, Kumar M, Bassan N. ASRoIL: a comprehensive survey for automatic speech recognition of Indian languages. Artif Intell Rev. 2020;53:3673\u2013704.","journal-title":"Artif Intell Rev"},{"key":"3794_CR15","doi-asserted-by":"crossref","unstructured":"Kumar LA, Renuka DK, Priya MCS. Towards robust speech recognition model using deep learning. 2023 International Conference on Intelligent Systems for Communication, IoT and Security (ICISCoIS), Coimbatore, India;2023,pp. 253-256.","DOI":"10.1109\/ICISCoIS56541.2023.10100390"},{"key":"3794_CR16","doi-asserted-by":"publisher","unstructured":"Priya MCS, Renuka DK, Kumar LA, et al. Multilingual low resource Indian language speech recognition and spell correction using Indic BERT. S\u0101dhan\u0101 47, 2022;227 . https:\/\/doi.org\/10.1007\/s12046-022-01973-5","DOI":"10.1007\/s12046-022-01973-5"},{"key":"3794_CR17","doi-asserted-by":"publisher","unstructured":"Prabha G, Jyothsna PV, Shahina KK, Premjith B, Soman KP. A deep learning approach for part-of-speech tagging in nepali language. 2018 International Conference on Advances in Computing, Communications and Informatics (ICACCI), Bangalore, India, 2018;pp. 1132-1136, https:\/\/doi.org\/10.1109\/ICACCI.2018.8554812.","DOI":"10.1109\/ICACCI.2018.8554812"},{"key":"3794_CR18","doi-asserted-by":"publisher","unstructured":"Kiran KS, Mandal A, Prasanna Kumar KR, Mitra P, Veni S. A comparative study of dictionary learning algorithms on speech recognition task. 2018 International Conference on Advances in Computing, Communications and Informatics (ICACCI), Bangalore, India, 2018;pp. 588-594, https:\/\/doi.org\/10.1109\/ICACCI.2018.8554758.","DOI":"10.1109\/ICACCI.2018.8554758"},{"key":"3794_CR19","doi-asserted-by":"publisher","unstructured":"Shraddha S, Jl G, SK S. Child speech recognition on end-to-end neural ASR models. 2022 2nd International Conference on Intelligent Technologies (CONIT), Hubli, India, 2022;pp. 1-6, https:\/\/doi.org\/10.1109\/CONIT55038.2022.9847929","DOI":"10.1109\/CONIT55038.2022.9847929"},{"key":"3794_CR20","doi-asserted-by":"publisher","unstructured":"Darshana S, Theivaprakasham H, Jyothish Lal G, Premjith B, Sowmya V, Soman K. MARS: a hybrid deep CNN-based multi-accent recognition system for english language. 2022 First International Conference on Artificial Intelligence Trends and Pattern Recognition (ICAITPR), Hyderabad, India, 2022;pp. 1-6, https:\/\/doi.org\/10.1109\/ICAITPR51569.2022.9844177.","DOI":"10.1109\/ICAITPR51569.2022.9844177"},{"key":"3794_CR21","unstructured":"Touvron H, Martin L, Stone K, Albert P, Almahairi A, et al. Llama 2: open foundation and fine-tuned chat models, 2023;Arxiv."},{"key":"3794_CR22","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, Subbiah M, Kaplan JD, et al. Language models are few-shot learners. Adv Neural Inf Process Syst. 2020;33:1877\u2013901.","journal-title":"Adv Neural Inf Process Syst"},{"key":"3794_CR23","unstructured":"Le ST, la Fan A, Akiki C, Pavlick E, Suzana F, et al. Bloom: A 176b-parameter open-access multilingual language model. arXiv."},{"key":"3794_CR24","unstructured":"Zhang S, Roller S, Goyal N, Artetxe M, Chen M, et al. OPT: open pre-trained transformer language models, ArXiv"},{"key":"3794_CR25","unstructured":"Hoffmann J, Borgeaud S, Mensch A, Buchatskaya E, Cai T. Training compute-optimal large language models, 2022;Arxiv."},{"key":"3794_CR26","doi-asserted-by":"crossref","unstructured":"Gulati A, Qin J, Chiu C, Parmar N, Zhang Yet al. Conformer: Convolution-augmented Transformer for Speech Recognition. 2020 arXiv:2005.08100","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"3794_CR27","unstructured":"Radford A, Kim JW, Xu T, Brockman G, Mcleavey C, Sutskever I. Robust Speech Recognition via Large-Scale Weak Supervision. Proceedings of the 40th International Conference on Machine Learning, in Proceedings of Machine Learning Research, 2023;202:28492-28518"},{"key":"3794_CR28","doi-asserted-by":"crossref","unstructured":"Sim KC, et al. A comparison of parameter-efficient ASR domain adaptation methods for universal speech and language models. ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Seoul, Korea, Republic of, 2024;pp. 6900-6904.","DOI":"10.1109\/ICASSP48485.2024.10445894"},{"key":"3794_CR29","doi-asserted-by":"crossref","unstructured":"Prabhavalkar R, et al. Extreme encoder output frame rate reduction: improving computational latencies of large end-to-end models. ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Seoul, Korea, Republic of, 2024;pp. 11816\u201311820.","DOI":"10.1109\/ICASSP48485.2024.10446985"},{"key":"3794_CR30","doi-asserted-by":"crossref","unstructured":"Wang W, Prabhavalkar R, Shan H, Meng Z, Hwang D, Li Q, Sim KC, Li B, Qin J, Cai X, Stooke A, Zheng C, He Y, Sainath T, Mengibar PM. Massive end-to-end speech recognition models with time reduction. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), 2024;pages 6206\u20136217, Mexico City, Mexico. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2024.naacl-long.344"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-03794-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-025-03794-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-03794-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T01:21:36Z","timestamp":1743384096000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-025-03794-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,28]]},"references-count":30,"journal-issue":{"issue":"4","published-online":{"date-parts":[[2025,4]]}},"alternative-id":["3794"],"URL":"https:\/\/doi.org\/10.1007\/s42979-025-03794-9","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,28]]},"assertion":[{"value":"25 October 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 February 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 March 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there is no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"323"}}