{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T13:12:42Z","timestamp":1744204362979,"version":"3.37.3"},"reference-count":20,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,1,27]],"date-time":"2021-01-27T00:00:00Z","timestamp":1611705600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,27]],"date-time":"2021-01-27T00:00:00Z","timestamp":1611705600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s10772-021-09811-5","type":"journal-article","created":{"date-parts":[[2021,1,27]],"date-time":"2021-01-27T06:02:52Z","timestamp":1611727372000},"page":"419-424","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Exploring end-to-end framework towards Khasi speech recognition system"],"prefix":"10.1007","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6711-2494","authenticated-orcid":false,"given":"Bronson","family":"Syiem","sequence":"first","affiliation":[]},{"given":"L. Joyprakash","family":"Singh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,1,27]]},"reference":[{"key":"9811_CR1","unstructured":"Amodei, D., et al. (2016). Deep speech 2: End-to-End speech recognition in English and Mandarin. In Proceedings of the 33rd international conference on machine learning (Vol. 48, pp. 173\u2013182)."},{"key":"9811_CR2","first-page":"585","volume":"8","author":"RP Bachate","year":"2019","unstructured":"Bachate, R. P., & Sharma, A. (2019). Automatic speech recognition systems for regional languages in India. International Journal of Recent Technology and Engineering, 8, 585\u2013592.","journal-title":"International Journal of Recent Technology and Engineering"},{"key":"9811_CR3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621","author":"W Chan","year":"2016","unstructured":"Chan, W., Jaitly, N., Le, Q., & Vinyals, O. (2016). Listen, attend and spell: A neural network for large vocabulary conversational speech recognition. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). https:\/\/doi.org\/10.1109\/ICASSP.2016.7472621.","journal-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"},{"key":"9811_CR4","unstructured":"Escur i Gelabert, J. (2017). Exploring automatic speech recognition with TensorFlow (pp. 1\u201336). Degree thesis."},{"key":"9811_CR5","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s10772-018-9497-6","volume":"21","author":"J Guglani","year":"2018","unstructured":"Guglani, J., & Mishra, A. N. (2018). Continuous Punjabi speech recognition model based on Kaldi ASR toolkit. International Journal of Speech Technology, 21, 211\u2013216.","journal-title":"International Journal of Speech Technology"},{"key":"9811_CR6","unstructured":"Hannun, A., et al. (2014). Deep speech: Scaling up End-to-End speech recognition (pp. 1\u201312). arxiv.org\/abs\/1412.5567."},{"key":"9811_CR7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1296","author":"T Hori","year":"2017","unstructured":"Hori, T., Watanabe, S., Zhang, Y., & Chan, W. (2017). Advances in joint CTC-attention based End-to-End speech recognition with a deep CNN encoder and RNN-LM. Interspeech. https:\/\/doi.org\/10.21437\/Interspeech.2017-1296.","journal-title":"Interspeech"},{"key":"9811_CR8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953075,4835-4839","author":"S Kim","year":"2017","unstructured":"Kim, S., Hori, T., & Watanabe, S. (2017). Joint CTC-attention based End-to-End speech recognition using multi-task learning. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). https:\/\/doi.org\/10.1109\/ICASSP.2017.7953075,4835-4839.","journal-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"},{"key":"9811_CR9","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639629","author":"G Kurata","year":"2018","unstructured":"Kurata, G., & Audhkhasi, K. (2018). Improved knowledge distillation from bi-directional to uni-directional LSTM CTC for End-to-End speech recognition. IEEE Spoken Language Technology Workshop (SLT). https:\/\/doi.org\/10.1109\/SLT.2018.8639629.","journal-title":"IEEE Spoken Language Technology Workshop (SLT)"},{"key":"9811_CR10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1819","author":"J Li","year":"2019","unstructured":"Li, J., et al. (2019). Jasper: An End-to-End convolutional neural acoustic model. Interspeech. https:\/\/doi.org\/10.21437\/Interspeech.2019-1819.","journal-title":"Interspeech"},{"key":"9811_CR11","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790","author":"Y Miao","year":"2015","unstructured":"Miao, Y., Gowayyed, M., & Metze, F. (2015). EESEN: End-to-End speech recognition using deep RNN models and WFST-based decoding. IEEE Workshop on Automatic Speech Recognition and Understanding. https:\/\/doi.org\/10.1109\/ASRU.2015.7404790.","journal-title":"IEEE Workshop on Automatic Speech Recognition and Understanding"},{"key":"9811_CR12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680","author":"DS Park","year":"2019","unstructured":"Park, D. S., et al. (2019). SpecAugment: A simple data augmentation method for automatic speech recognition. Interspeech. https:\/\/doi.org\/10.21437\/Interspeech.2019-2680.","journal-title":"Interspeech"},{"key":"9811_CR13","unstructured":"Renkens, V. Retrieved November 21, 2019, from https:\/\/www.github.com\/vrenkens\/nabu."},{"key":"9811_CR14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682850","author":"C Shan","year":"2019","unstructured":"Shan, C., et al. (2019). Investigating End-to-End speech recognition for Mandarin-English code-switching. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). https:\/\/doi.org\/10.1109\/ICASSP.2019.8682850.","journal-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"},{"key":"9811_CR15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462492","author":"C Shan","year":"2018","unstructured":"Shan, C., Zhang, J., Wang, Y., & Xie, L. (2018). Attention-based End-to-End speech recognition on voice search. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). https:\/\/doi.org\/10.1109\/ICASSP.2018.8462492.","journal-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"},{"key":"9811_CR16","doi-asserted-by":"publisher","DOI":"10.1109\/ICBSLP.2018.8554871","author":"SH Sumit","year":"2018","unstructured":"Sumit, S. H., Al Muntasir, T., Zaman, M. A., Nandi, R. N., & Sourov, T. (2018). Noise Robust End-to-End speech recognition for Bangla language. International Conference on Bangla Speech and Language Processing (ICBSLP). https:\/\/doi.org\/10.1109\/ICBSLP.2018.8554871.","journal-title":"International Conference on Bangla Speech and Language Processing (ICBSLP)"},{"issue":"8","key":"9811_CR17","doi-asserted-by":"publisher","first-page":"1240","DOI":"10.1109\/JSTSP.2017.2763455","volume":"11","author":"S Watanabe","year":"2017","unstructured":"Watanabe, S. (2017). Hybrid CTC\/attention architecture for End-to-End speech recognition. IEEE Journal of Selected Topics in Signal Processing, 11(8), 1240\u20131253.","journal-title":"IEEE Journal of Selected Topics in Signal Processing"},{"key":"9811_CR18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1616","author":"A Zeyer","year":"2018","unstructured":"Zeyer, A., Irie, K., Schluter, R., & Ney, H. (2018). Improved training of End-to-End attention models for speech recognition. Interspeech. https:\/\/doi.org\/10.21437\/Interspeech.2018-1616.","journal-title":"Interspeech"},{"key":"9811_CR19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1446","author":"Y Zhang","year":"2016","unstructured":"Zhang, Y., et al. (2016). Towards End-to-End speech recognition with deep convolutional neural networks. International Conference on Intelligent Robotics and Applications. https:\/\/doi.org\/10.21437\/Interspeech.2016-1446.","journal-title":"International Conference on Intelligent Robotics and Applications"},{"key":"9811_CR20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953077","author":"Y Zhang","year":"2017","unstructured":"Zhang, Y., Chan, W., & Jaitly, N. (2017). Very deep convolutional networks for End-to-End speech recognition. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). https:\/\/doi.org\/10.1109\/ICASSP.2017.7953077.","journal-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09811-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-021-09811-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09811-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,17]],"date-time":"2021-05-17T13:14:19Z","timestamp":1621257259000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-021-09811-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,27]]},"references-count":20,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["9811"],"URL":"https:\/\/doi.org\/10.1007\/s10772-021-09811-5","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2021,1,27]]},"assertion":[{"value":"16 March 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 November 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 January 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}