{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T05:36:24Z","timestamp":1774935384315,"version":"3.50.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T00:00:00Z","timestamp":1748822400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T00:00:00Z","timestamp":1748822400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-025-04058-2","type":"journal-article","created":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T12:15:35Z","timestamp":1748866535000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["An End-to-End Continuous Speech Recognition System in Bengali for General and Elderly Domain"],"prefix":"10.1007","volume":"6","author":[{"given":"Shubhojeet","family":"Paul","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0680-2691","authenticated-orcid":false,"given":"Vandana","family":"Bhattacharjee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sujan Kumar","family":"Saha","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,2]]},"reference":[{"key":"4058_CR1","doi-asserted-by":"publisher","unstructured":"Porcheron M, Fischer JE, Reeves S, Sharples S. Voice interfaces in everyday life. In: Proceedings of the 2018 CHI Conference on Human Factors in Computing Systems, pp. 1\u201312. ACM, Montreal QC, Canada (2018). https:\/\/doi.org\/10.1145\/3173574.3174214","DOI":"10.1145\/3173574.3174214"},{"issue":"2","key":"4058_CR2","doi-asserted-by":"publisher","first-page":"1888","DOI":"10.11591\/ijai.v13.i2.pp1888-1899","volume":"13","author":"JK Adeniyi","year":"2024","unstructured":"Adeniyi JK, Ajagbe SA, Adeniyi AE, Aworinde HO, Falola PB, Adigun MO. Easesum: An online abstractive and extractive text summarizer using deep learning technique. IAES Int J Artif Intell (IJ-AI). 2024;13(2):1888\u201399. https:\/\/doi.org\/10.11591\/ijai.v13.i2.pp1888-1899.","journal-title":"IAES Int J Artif Intell (IJ-AI)"},{"key":"4058_CR3","unstructured":"Eberhard DM, Simons GF, Fennig CD. Ethnologue: Languages of the World, 25 edn. SIL International, Dallas, Texas (2022). https:\/\/www.ethnologue.com\/guides\/countries-most-languages"},{"key":"4058_CR4","unstructured":"General Registrar, Census\u00a0Commissioner I. Language: India, States and Union Territories. Census of India 2011. Ministry of Home Affairs, Government of India (2011). https:\/\/censusindia.gov.in\/2011Census\/C-16.html"},{"key":"4058_CR5","doi-asserted-by":"publisher","unstructured":"Amin MAA, Islam MT, Kibria S, Rahman MS. Continuous bengali speech recognition based on deep neural network. In: 2019 International Conference on Electrical, Computer and Communication Engineering (ECCE), pp. 1\u20136 (2019). https:\/\/doi.org\/10.1109\/ECACE.2019.8679341","DOI":"10.1109\/ECACE.2019.8679341"},{"key":"4058_CR6","doi-asserted-by":"publisher","unstructured":"Das B, Mandal S, Mitra P. Bengali speech corpus for continuous automatic speech recognition system. In: 2011 International Conference on Speech Database and Assessments (Oriental COCOSDA), pp. 51\u201355. IEEE, Hsinchu, Taiwan (2011). https:\/\/doi.org\/10.1109\/ICSDA.2011.6085979","DOI":"10.1109\/ICSDA.2011.6085979"},{"key":"4058_CR7","doi-asserted-by":"publisher","unstructured":"Bhowmik T, Choudhury A, Mandal SKD. Deep neural network based recognition and classification of bengali phonemes: A case study of bengali unconstrained speech. In: Bhattacharyya P, Sastry H, Marriboyina V, Sharma R (eds.) Smart and Innovative Trends in Next Generation Computing Technologies. NGCT 2017. Communications in Computer and Information Science, 827, 715\u2013725. Springer, Singapore (2018). https:\/\/doi.org\/10.1007\/978-981-10-8657-1_58","DOI":"10.1007\/978-981-10-8657-1_58"},{"issue":"1","key":"4058_CR8","first-page":"1","volume":"1","author":"B Purkaystha","year":"2019","unstructured":"Purkaystha B, Nahid M.M.H, Islam M.S. End-to-end bengali speech recognition using deepspeech. J Eng Res, Innov Edu JERIE. 2019;1(1):1\u201310.","journal-title":"J Eng Res, Innov Edu JERIE"},{"key":"4058_CR9","doi-asserted-by":"publisher","unstructured":"Sumit SH, Muntasir TA, Zaman M, Nandi RN, Sourov T. Noise robust end-to-end speech recognition for bangla language. In: Proceedings of the 2018 International Conference on Bangla Speech and Language Processing (ICBSLP), pp. 1\u20135. IEEE, Sylhet, Bangladesh (2018). https:\/\/doi.org\/10.1109\/ICBSLP.2018.8554871","DOI":"10.1109\/ICBSLP.2018.8554871"},{"key":"4058_CR10","unstructured":"Inc G. Large Bengali ASR Training Dataset. Accessed: 2024-01-16 (2018). https:\/\/www.openslr.org\/53"},{"key":"4058_CR11","first-page":"267","volume-title":"Recent findings in intelligent computing techniques advances in intelligent systems and computing","author":"N Bassan","year":"2019","unstructured":"Bassan N, Kadyan V. An experimental study of continuous automatic speech recognition system using mfcc with reference to punjabi language. In: Sa P, Bakshi S, Hatzilygeroudis I, Sahoo M, editors. Recent findings in intelligent computing techniques advances in intelligent systems and computing. Singapore: Springer; 2019. p. 267\u201375."},{"key":"4058_CR12","doi-asserted-by":"publisher","unstructured":"Bhardwaj V, Kukreja V, Kaur N, Modi N. Building an asr system for indian (punjabi) language and its evaluation for malwa and majha dialect: Preliminary results. In: Proceedings of the 12th International Conference on Computing Communication and Networking Technologies (ICCCNT), pp. 1\u20135. IEEE, Kharagpur, India (2021).https:\/\/doi.org\/10.1109\/ICCCNT51525.2021.9579471","DOI":"10.1109\/ICCCNT51525.2021.9579471"},{"key":"4058_CR13","doi-asserted-by":"publisher","unstructured":"Karunanayake Y, Thayasivam U, Ranathunga S. Sinhala and tamil speech intent identification from english phoneme based asr. In: Proceedings of the 2019 International Conference on Asian Language Processing (IALP), pp. 234\u2013239. IEEE, Shanghai, China (2019). https:\/\/doi.org\/10.1109\/IALP48816.2019.9037702","DOI":"10.1109\/IALP48816.2019.9037702"},{"key":"4058_CR14","unstructured":"Panayotov V, Chen G, Povey D, Khudanpur S. LibriSpeech ASR Corpus. OpenSLR (2015). https:\/\/www.openslr.org\/12"},{"issue":"10","key":"4058_CR15","doi-asserted-by":"publisher","first-page":"6747","DOI":"10.1007\/s00521-018-3499-9","volume":"31","author":"M Dua","year":"2019","unstructured":"Dua M, Aggarwal RK, Biswas M. Discriminatively trained continuous hindi speech recognition system using interpolated recurrent neural network language modeling. Neural Comput Appl. 2019;31(10):6747\u201355. https:\/\/doi.org\/10.1007\/s00521-018-3499-9.","journal-title":"Neural Comput Appl"},{"key":"4058_CR16","doi-asserted-by":"publisher","unstructured":"Kuamr A, Dua M, Choudhary A. Implementation and performance evaluation of continuous hindi speech recognition. In: 2014 International Conference on Electronics and Communication Systems (ICECS), pp. 1\u20135. IEEE, Coimbatore, India (2014). https:\/\/doi.org\/10.1109\/ECS.2014.6892777","DOI":"10.1109\/ECS.2014.6892777"},{"key":"4058_CR17","doi-asserted-by":"publisher","unstructured":"Moncy AM, Athira M, Hanna J, Rajan R. Automatic speech recognition in malayalam using dnn-based acoustic modelling. In: Proceedings of the 2020 IEEE Recent Advances in Intelligent Computational Systems (RAICS), pp. 170\u2013174. IEEE, Thiruvananthapuram, India (2020). https:\/\/doi.org\/10.1109\/RAICS51191.2020.9332493","DOI":"10.1109\/RAICS51191.2020.9332493"},{"key":"4058_CR18","doi-asserted-by":"publisher","unstructured":"Muhammad G, Alotaibi YA, Huda MN. Automatic speech recognition for bangla digits. In: Proceedings of the 2009 12th International Conference on Computers and Information Technology (ICCIT), pp. 379\u2013383. IEEE, Dhaka, Bangladesh (2009). https:\/\/doi.org\/10.1109\/ICCIT.2009.5407267","DOI":"10.1109\/ICCIT.2009.5407267"},{"key":"4058_CR19","doi-asserted-by":"publisher","unstructured":"Maruf MR, Faruque MO, Mahmood S, Nelima NN, Muhtasim MG, Pervez MJA. Effects of noise on rasta-plp and mfcc based bangla asr using cnn. In: Proceedings of the 2020 IEEE Region 10 Symposium (TENSYMP), pp. 1564\u20131567. IEEE, Dhaka, Bangladesh (2020). https:\/\/doi.org\/10.1109\/TENSYMP50017.2020.9231034","DOI":"10.1109\/TENSYMP50017.2020.9231034"},{"key":"4058_CR20","unstructured":"Garofolo JS, Lamel LF, Fisher WM, Fiscus JG, Pallett DS, Dahlgren NL, Zue V. TIMIT Acoustic-Phonetic Continuous Speech Corpus. Linguistic Data Consortium, Philadelphia. Accessed: 2024-01-19 (1993). https:\/\/doi.org\/10.35111\/17gk-bn40"},{"key":"4058_CR21","doi-asserted-by":"publisher","unstructured":"Nahid MMH, Purkaystha B, Islam MS. Bengali speech recognition: A double layered lstm-rnn approach. In: Proceedings of the 20th International Conference on Computer and Information Technology (ICCIT), pp. 1\u20136. IEEE, Dhaka, Bangladesh (2017). https:\/\/doi.org\/10.1109\/ICCITECHN.2017.8281848","DOI":"10.1109\/ICCITECHN.2017.8281848"},{"key":"4058_CR22","unstructured":"Bills A, Bishop J, David A, Dubinski E, Fiscus JG, Gillies B, Harper M, Jarrett A, Molina MEP, Ray J, Rytting A, Paget S, Shen W, Silber R, Tzoukermann E, Wong J. IARPA Babel Bengali Language Pack IARPA-babel103b-v0.4b. Intelligence Advanced Research Projects Activity (IARPA). Accessed: 2024-02-06 (2016). https:\/\/catalog.ldc.upenn.edu\/LDC2016S08"},{"key":"4058_CR23","unstructured":"Alam F, Habib SMM, Sultana DA, Khan M. Development of annotated bangla speech corpora. In: Proceedings of the 2nd Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU 2010), pp. 35\u201341. ISCA, Penang, Malaysia (2010)"},{"key":"4058_CR24","doi-asserted-by":"publisher","unstructured":"Pham N-Q, Nguyen T-S, Niehues J, M\u00fcller M, St\u00fcker S, Waibel A. Very deep self-attention networks for end-to-end speech recognition. arXiv preprint arXiv:1904.13377 (2019) https:\/\/doi.org\/10.48550\/arXiv.1904.13377","DOI":"10.48550\/arXiv.1904.13377"},{"key":"4058_CR25","doi-asserted-by":"publisher","unstructured":"Radford A, Kim JW, Xu T, Brockman G, McLeavey C, Sutskever I. Robust speech recognition via large-scale weak supervision. arXiv preprint arXiv:2212.04356 (2022) https:\/\/doi.org\/10.48550\/arXiv.2212.04356","DOI":"10.48550\/arXiv.2212.04356"},{"key":"4058_CR26","doi-asserted-by":"publisher","unstructured":"Baevski A, Zhou H, Mohamed A, Auli M. wav2vec 2.0: A framework for self-supervised learning of speech representations. arXiv preprint arXiv:2006.11477 (2020) https:\/\/doi.org\/10.48550\/arXiv.2006.11477","DOI":"10.48550\/arXiv.2006.11477"},{"key":"4058_CR27","doi-asserted-by":"publisher","unstructured":"Diwan A, Vaideeswaran R, Shah S, Singh A, Raghavan S, Khare S, Gulati P, Rasiwasia N, Abraham B. Multilingual and code-switching asr challenges for low resource indian languages. arXiv preprint arXiv:2104.00235 (2021) https:\/\/doi.org\/10.48550\/arXiv.2104.00235","DOI":"10.48550\/arXiv.2104.00235"},{"key":"4058_CR28","doi-asserted-by":"publisher","unstructured":"Kjartansson O, Sarin S, Pipatsrisawat K, Jansche M, Ha L. Crowd-sourced speech corpora for javanese, sundanese, sinhala, nepali, and bangladeshi bengali. In: Proceedings of the 6th International Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU 2018), pp. 52\u201355. International Speech Communication Association (ISCA), Gurugram, India (2018). https:\/\/doi.org\/10.21437\/SLTU.2018-11","DOI":"10.21437\/SLTU.2018-11"},{"key":"4058_CR29","doi-asserted-by":"publisher","unstructured":"Kehtarnavaz N. Chapter 7 - frequency domain processing. In: Kehtarnavaz, N. (ed.) Digital Signal Processing System Design (Second Edition), 2nd edn., pp. 175\u2013196. Academic Press, Burlington (2008). https:\/\/doi.org\/10.1016\/B978-0-12-374490-6.00007-6","DOI":"10.1016\/B978-0-12-374490-6.00007-6"},{"issue":"1","key":"4058_CR30","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/0925-2312(95)00044-5","volume":"10","author":"R Cancelliere","year":"1996","unstructured":"Cancelliere R, Gemello R. Efficient training of time delay neural networks for sequential patterns. Neurocomputing. 1996;10(1):33\u201342. https:\/\/doi.org\/10.1016\/0925-2312(95)00044-5.","journal-title":"Neurocomputing"},{"issue":"1","key":"4058_CR31","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1007\/s10772-020-09757-0","volume":"25","author":"A Kumar","year":"2020","unstructured":"Kumar A, Aggarwal RK. Hindi speech recognition using time delay neural network acoustic modeling with i-vector adaptation. Int J Speech Technol. 2020;25(1):67\u201378. https:\/\/doi.org\/10.1007\/s10772-020-09757-0.","journal-title":"Int J Speech Technol"},{"issue":"10","key":"4058_CR32","doi-asserted-by":"publisher","first-page":"1123","DOI":"10.1007\/s10772-019-09652-3","volume":"22","author":"V Passricha","year":"2019","unstructured":"Passricha V, Aggarwal RK. Pso-based optimized cnn for hindi asr. Int J Speech Technol. 2019;22(10):1123\u201333. https:\/\/doi.org\/10.1007\/s10772-019-09652-3.","journal-title":"Int J Speech Technol"},{"key":"4058_CR33","doi-asserted-by":"publisher","unstructured":"Zhang Y, Pezeshki M, Brakel P, Zhang S, Laurent C, Bengio Y, Courville A. Towards end-to-end speech recognition with deep convolutional neural networks. arXiv preprint arXiv:1701.02720 (2017) https:\/\/doi.org\/10.48550\/arXiv.1701.02720","DOI":"10.48550\/arXiv.1701.02720"},{"key":"4058_CR34","doi-asserted-by":"publisher","unstructured":"Abdullah BM, Illina I, Fohr D. Dynamic extension of asr lexicon using wikipedia data. In: Proceedings of the 2018 IEEE Spoken Language Technology Workshop (SLT), pp. 196\u2013202. IEEE, Athens, Greece (2018). https:\/\/doi.org\/10.1109\/SLT.2018.8639592","DOI":"10.1109\/SLT.2018.8639592"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-04058-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-025-04058-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-04058-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,3]],"date-time":"2025-06-03T00:02:13Z","timestamp":1748908933000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-025-04058-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,2]]},"references-count":34,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2025,6]]}},"alternative-id":["4058"],"URL":"https:\/\/doi.org\/10.1007\/s42979-025-04058-2","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,2]]},"assertion":[{"value":"29 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 June 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed Consent"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Research Involving Human or Animals"}}],"article-number":"513"}}