{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T22:41:59Z","timestamp":1765233719788},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,3,6]],"date-time":"2024-03-06T00:00:00Z","timestamp":1709683200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,6]],"date-time":"2024-03-06T00:00:00Z","timestamp":1709683200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"DST-ICPS","award":["T88"],"award-info":[{"award-number":["T88"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-024-02608-8","type":"journal-article","created":{"date-parts":[[2024,3,6]],"date-time":"2024-03-06T13:02:32Z","timestamp":1709730152000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["CRDNN-BiLSTM Knowledge Distillation Model Towards Enhancing the Automatic Speech Recognition"],"prefix":"10.1007","volume":"5","author":[{"given":"L.","family":"Ashok Kumar","sequence":"first","affiliation":[]},{"given":"D.","family":"Karthika Renuka","sequence":"additional","affiliation":[]},{"given":"K. S.","family":"Naveena","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sree Resmi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,6]]},"reference":[{"key":"2608_CR1","doi-asserted-by":"crossref","unstructured":"Asami T, Masumura R, Yamaguchi Y, Masataki H, Aono Y. Domain adaptation of dnn acoustic models using knowledge distillation. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE; 2017, March. p. 5185\u20135189.","DOI":"10.1109\/ICASSP.2017.7953145"},{"key":"2608_CR2","unstructured":"Chung J, Gulcehre C, Cho K, Bengio Y. Empirical evaluation of gated recurrent neural networks on sequence modelling 2014. arXiv preprint arXiv:1412.3555."},{"key":"2608_CR3","unstructured":"Collobert R, Puhrsch C, Synnaeve G. Wav2letter: an end-to-end convnet-based speech recognition system, 2016."},{"key":"2608_CR4","doi-asserted-by":"crossref","unstructured":"Fukuda T, Suzuki M, Kurata G, Thomas S, Cui J, Ramabhadran B. Efficient knowledge distillation from an ensemble of teachers. In: Interspeech 2017, August. p. 3697\u20133701.","DOI":"10.21437\/Interspeech.2017-614"},{"key":"2608_CR5","doi-asserted-by":"crossref","unstructured":"Graves A, Fern\u00e1ndez S, Gomez F, Schmidhuber J. Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on Machine learning 2006, June. p. 369\u2013376.","DOI":"10.1145\/1143844.1143891"},{"key":"2608_CR6","doi-asserted-by":"crossref","unstructured":"Gudepu PR, Vadisetti GP, Niranjan A, Saranu K, Sarma R, Shaik MAB, Paramasivam P. Whisper augmented end-to-end\/hybrid speech recognition system-CycleGAN approach. In: INTERSPEECH; 2020. p. 2302\u20132306.","DOI":"10.21437\/Interspeech.2020-2639"},{"key":"2608_CR7","doi-asserted-by":"crossref","unstructured":"Guo J, Sainath T, RonWeiss. A spelling correction model for end-to-end speech recognition, 05 2019.","DOI":"10.1109\/ICASSP.2019.8683745"},{"key":"2608_CR8","unstructured":"Hinton G, Vinyals O, Dean J. Distilling the knowledge in a neural network 2015. arXiv preprint arXiv:1503.02531."},{"key":"2608_CR9","doi-asserted-by":"crossref","unstructured":"Huang M, You Y, Chen Z, Qian Y, Yu K. Knowledge distillation for sequence model. In: Interspeech 2018, September. p. 3703\u20133707.","DOI":"10.21437\/Interspeech.2018-1589"},{"key":"2608_CR10","unstructured":"Hui L, Belkin M. Evaluation of neural architectures trained with square loss vs cross-entropy in classification tasks 2020. arXiv preprint arXiv:2006.07322."},{"key":"2608_CR11","unstructured":"Ioffe S, Szegedy C. Batch normalization: Accelerating deep network training by reducing internal covariate shift. In: International conference on machine learning. PMLR. 2015, June. p. 448\u2013456."},{"key":"2608_CR12","doi-asserted-by":"crossref","unstructured":"Jiang Y, Sharma B, Madhavi M, Li H. Knowledge distillation from BERT transformer to speech transformer for intent classification 2021. arXiv preprint arXiv:2108.02598.","DOI":"10.21437\/Interspeech.2021-402"},{"key":"2608_CR13","doi-asserted-by":"crossref","unstructured":"Kumar LA, Renuka DK, Priya MS. Towards robust speech recognition model using deep learning. In: 2023 International conference on intelligent systems for communication, IoT and security (ICISCoIS) 2023, February. IEEE. p. 253\u2013256.","DOI":"10.1109\/ICISCoIS56541.2023.10100390"},{"key":"2608_CR14","doi-asserted-by":"crossref","unstructured":"Kurata G, Audhkhasi K. Improved knowledge distillation from bi-directional to uni-directional LSTM CTC for end-to-end speech recognition. In: 2018 IEEE spoken language technology workshop (SLT) 2018, December. IEEE. p. 411\u2013417.","DOI":"10.1109\/SLT.2018.8639629"},{"key":"2608_CR15","doi-asserted-by":"crossref","unstructured":"Lee MH, Chang JH. Knowledge distillation from language model to acoustic model: a hierarchical multi-task learning approach. In: ICASSP 2022-2022 IEEE international conference on acoustics, speech and signal processing (ICASSP) 2022, May. IEEE. p. 8392\u20138396.","DOI":"10.1109\/ICASSP43922.2022.9747082"},{"key":"2608_CR16","doi-asserted-by":"crossref","unstructured":"Li C, Zhu L, Xu S, Gao P, Xu B. Compression of the acoustic model via knowledge distillation and pruning. In: 2018 24th International conference on pattern recognition (ICPR) 2018, August. IEEE. p. 2785\u20132790.","DOI":"10.1109\/ICPR.2018.8545028"},{"key":"2608_CR17","doi-asserted-by":"crossref","unstructured":"Li J, Lavrukhin V, Ginsburg B, Leary R, Kuchaiev O, Cohen JM, Gadde RT. Jasper: an end-to-end convolutional neural acoustic model. 2019. arXiv preprint arXiv:1904.03288.","DOI":"10.21437\/Interspeech.2019-1819"},{"key":"2608_CR18","doi-asserted-by":"crossref","unstructured":"Liu Y, Xiong H, He Z, Zhang J, Wu H, Wang H, Zong C. End-to-end speech translation with knowledge distillation 2019. arXiv preprint arXiv:1904.08075.","DOI":"10.21437\/Interspeech.2019-2582"},{"key":"2608_CR19","unstructured":"Lu KH, Chen KY. A context-aware knowledge transferring strategy for CTC-based ASR 2022. arXiv preprint arXiv:2210.06244."},{"key":"2608_CR20","doi-asserted-by":"crossref","unstructured":"Masumura R, Makishima N, Ihori M, Takashima A, Tanaka T, Orihashi S. Hierarchical transformer-based large-context end-to-end asr with large-context knowledge distillation. In: ICASSP 2021-2021 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE. 2021, June. p. 5879\u20135883.","DOI":"10.1109\/ICASSP39728.2021.9414928"},{"issue":"4","key":"2608_CR21","doi-asserted-by":"publisher","first-page":"4873","DOI":"10.3233\/JIFS-213332","volume":"43","author":"S Priya","year":"2022","unstructured":"Priya S, Karthika Renuka D, Ashok Kumar L. Towards improving speech recognition model with post-processing spell correction using BERT. J Intell Fuzzy Syst. 2022;43(4):4873\u201382.","journal-title":"J Intell Fuzzy Syst"},{"issue":"4","key":"2608_CR22","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1007\/s12046-022-01973-5","volume":"47","author":"MS Priya","year":"2022","unstructured":"Priya MS, Renuka DK, Kumar LA, Rose SL. Multilingual low resource Indian language speech recognition and spell correction using Indic BERT. S\u0101dhan\u0101. 2022;47(4):227.","journal-title":"S\u0101dhan\u0101"},{"key":"2608_CR23","unstructured":"Ravanelli M, Parcollet T, Plantinga P, Rouhe A, Cornell S, Lugosch L, Bengio Y. SpeechBrain: a general-purpose speech toolkit 2021. arXiv preprint arXiv:2106.04624"},{"key":"2608_CR24","volume-title":"Deep learning using python","author":"LS Rose","year":"2019","unstructured":"Rose LS, Kumar LA, Renuka DK. Deep learning using python. Oxford: Wiley; 2019."},{"key":"2608_CR25","doi-asserted-by":"publisher","first-page":"2633","DOI":"10.21437\/Interspeech.2022-775","volume":"2022","author":"S Tian","year":"2022","unstructured":"Tian S, Deng K, Li Z, Ye L, Cheng G, Li T, Yan Y. Knowledge distillation For CTC-based speech recognition via consistent acoustic representation learning. Proc Interspeech. 2022;2022:2633\u20137.","journal-title":"Proc. Interspeech"},{"key":"2608_CR26","doi-asserted-by":"crossref","unstructured":"Wang Y, Zhao J. Continuous speech recognition model based on CTC technology. In: 2018 International conference on network, communication, computer engineering (NCCE 2018). Atlantis Press. 2018, May. p. 149\u2013152.","DOI":"10.2991\/ncce-18.2018.25"},{"key":"2608_CR27","unstructured":"Yang X, Li Q, Zhang C, Woodland PC. Knowledge distillation from multiple foundation models for end-to-end speech recognition 2023. arXiv preprint arXiv:2303.1091710.4, 11.2 next one"},{"key":"2608_CR28","unstructured":"Yi J, Tao J, Wen Z, Liu B. Distilling knowledge using parallel data for far-field speech recognition 2018. arXiv preprint arXiv:1802.06941."},{"key":"2608_CR29","unstructured":"Yuan Z, Lyu Z, Li J, Zhou X. An improved hybrid ctc-attention model for speech recognition 2018. arXiv preprint arXiv:1810.12020."},{"key":"2608_CR30","doi-asserted-by":"publisher","first-page":"1385","DOI":"10.1109\/TASLP.2020.2988423","volume":"28","author":"W Zhang","year":"2020","unstructured":"Zhang W, Chang X, Qian Y, Watanabe S. Improving end-to-end single-channel multi-talker speech recognition. IEEE\/ACM Trans Audio, Speech Lang Process. 2020;28:1385\u201394.","journal-title":"IEEE\/ACM Trans Audio, Speech Lang Process"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-024-02608-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-024-02608-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-024-02608-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,6]],"date-time":"2024-03-06T13:03:18Z","timestamp":1709730198000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-024-02608-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,6]]},"references-count":30,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2024,3]]}},"alternative-id":["2608"],"URL":"https:\/\/doi.org\/10.1007\/s42979-024-02608-8","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,6]]},"assertion":[{"value":"22 December 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 March 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there is no conflict of interest","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"304"}}