{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T04:20:02Z","timestamp":1743049202192,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":43,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819749843"},{"type":"electronic","value":"9789819749850"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-4985-0_29","type":"book-chapter","created":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T11:07:08Z","timestamp":1721041628000},"page":"372-383","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Vietnamese Automatic Speech Recognition for\u00a0Financial Conversation Data"],"prefix":"10.1007","author":[{"given":"Tung Tran","family":"Nguyen Doan","sequence":"first","affiliation":[]},{"given":"Son Thanh","family":"Huynh","sequence":"additional","affiliation":[]},{"given":"An Trong","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"An Tran-Hoai","family":"Le","sequence":"additional","affiliation":[]},{"given":"An","family":"Phan Thi Thuy","sequence":"additional","affiliation":[]},{"given":"Dang T.","family":"Huynh","sequence":"additional","affiliation":[]},{"given":"Binh T.","family":"Nguyen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,16]]},"reference":[{"key":"29_CR1","unstructured":"Amodei, D., et al.: Deep Speech 2: end-to-end speech recognition in English and Mandarin. In: Proceedings of the 33rd International Conference on International Conference on Machine Learning, vol. 48, pp. 173\u2013182. ICML\u201916, JMLR.org (2016)"},{"key":"29_CR2","unstructured":"Ardila, R., et al.: Common Voice: a massively-multilingual speech corpus. In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 4218\u20134222. European Language Resources Association, Marseille, France (2020). https:\/\/aclanthology.org\/2020.lrec-1.520"},{"key":"29_CR3","doi-asserted-by":"publisher","unstructured":"Babu, A., et al.: XLS-R: self-supervised cross-lingual speech representation learning at scale. In: Proceedings of Interspeech 2022, pp. 2278\u20132282 (2022). https:\/\/doi.org\/10.21437\/Interspeech.2022-143","DOI":"10.21437\/Interspeech.2022-143"},{"key":"29_CR4","unstructured":"Baevski, A., Zhou, H., Mohamed, A., Auli, M.: Wav2vec 2.0: a framework for self-supervised learning of speech representations. In: Proceedings of the 34th International Conference on Neural Information Processing Systems. NIPS\u201920, Curran Associates Inc., Red Hook, NY, USA (2020)"},{"key":"29_CR5","doi-asserted-by":"publisher","unstructured":"Bahdanau, D., Chorowski, J., Serdyuk, D., Brakel, P., Bengio, Y.: End-to-end attention-based large vocabulary speech recognition. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4945\u20134949 (2016). https:\/\/doi.org\/10.1109\/ICASSP.2016.7472618","DOI":"10.1109\/ICASSP.2016.7472618"},{"issue":"6","key":"29_CR6","doi-asserted-by":"publisher","first-page":"1554","DOI":"10.1214\/aoms\/1177699147","volume":"37","author":"LE Baum","year":"1966","unstructured":"Baum, L.E., Petrie, T.: Statistical inference for probabilistic functions of finite state Markov chains. Ann. Math. Stat. 37(6), 1554\u20131563 (1966)","journal-title":"Ann. Math. Stat."},{"issue":"10\u201311","key":"29_CR7","doi-asserted-by":"publisher","first-page":"763","DOI":"10.1016\/j.specom.2007.02.006","volume":"49","author":"M Benzeghiba","year":"2007","unstructured":"Benzeghiba, M., et al.: Automatic speech recognition and speech variability: a review. Speech Commun. 49(10\u201311), 763\u2013786 (2007)","journal-title":"Speech Commun."},{"key":"29_CR8","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.specom.2013.07.008","volume":"56","author":"L Besacier","year":"2014","unstructured":"Besacier, L., Barnard, E., Karpov, A., Schultz, T.: Automatic speech recognition for under-resourced languages: a survey. Speech Commun. 56, 85\u2013100 (2014)","journal-title":"Speech Commun."},{"key":"29_CR9","doi-asserted-by":"publisher","unstructured":"Child, R., Gray, S., Radford, A., Sutskever, I.: Generating long sequences with sparse transformers (2019). https:\/\/doi.org\/10.48550\/ARXIV.1904.10509","DOI":"10.48550\/ARXIV.1904.10509"},{"key":"29_CR10","doi-asserted-by":"publisher","unstructured":"Conneau, A., Baevski, A., Collobert, R., Mohamed, A., Auli, M.: Unsupervised cross-lingual representation learning for speech recognition. In: Proceedings of Interspeech 2021, pp. 2426\u20132430 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-329","DOI":"10.21437\/Interspeech.2021-329"},{"key":"29_CR11","doi-asserted-by":"publisher","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376. ICML \u201906, Association for Computing Machinery, New York, NY, USA (2006). https:\/\/doi.org\/10.1145\/1143844.1143891","DOI":"10.1145\/1143844.1143891"},{"key":"29_CR12","doi-asserted-by":"publisher","unstructured":"Graves, A., Mohamed, A.r., Hinton, G.: Speech recognition with deep recurrent neural networks. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6645\u20136649 (2013). https:\/\/doi.org\/10.1109\/ICASSP.2013.6638947","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"29_CR13","doi-asserted-by":"crossref","unstructured":"Han, W., et a;: ContextNet: improving convolutional neural networks for automatic speech recognition with global context (2020)","DOI":"10.21437\/Interspeech.2020-2059"},{"key":"29_CR14","unstructured":"Hannun, A., et\u00a0al.: Deep speech: scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567 (2014)"},{"key":"29_CR15","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs). arXiv preprint arXiv:1606.08415 (2016)"},{"issue":"8","key":"29_CR16","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997). https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","journal-title":"Neural Comput."},{"key":"29_CR17","doi-asserted-by":"publisher","unstructured":"Jordan, M.I.: Chapter 25 - serial order: a parallel distributed processing approach. In: Donahoe, J.W., Packard Dorsel, V. (eds.) Neural-Network Models of Cognition, Advances in Psychology, vol.\u00a0121, pp. 471\u2013495. North-Holland (1997). https:\/\/doi.org\/10.1016\/S0166-4115(97)80111-2, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0166411597801112","DOI":"10.1016\/S0166-4115(97)80111-2"},{"key":"29_CR18","doi-asserted-by":"crossref","unstructured":"Karita, S., et\u00a0al.: A comparative study on transformer vs RNN in speech applications. In: 2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 449\u2013456. IEEE (2019)","DOI":"10.1109\/ASRU46091.2019.9003750"},{"key":"29_CR19","unstructured":"Kim, S., et al.: Squeezeformer: an efficient transformer for automatic speech recognition. In: Oh, A.H., Agarwal, A., Belgrave, D., Cho, K. (eds.) Advances in Neural Information Processing Systems (2022). https:\/\/openreview.net\/forum?id=gE_vt-w4LhL"},{"issue":"10","key":"29_CR20","first-page":"1995","volume":"3361","author":"Y LeCun","year":"1995","unstructured":"LeCun, Y., Bengio, Y., et al.: Convolutional networks for images, speech, and time series. The Handbook of Brain Theory and Neural Networks 3361(10), 1995 (1995)","journal-title":"The Handbook of Brain Theory and Neural Networks"},{"key":"29_CR21","unstructured":"Logan, B., et\u00a0al.: Mel frequency cepstral coefficients for music modeling. In: ISMIR, vol.\u00a0270, p.\u00a011. Plymouth, MA (2000)"},{"key":"29_CR22","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"29_CR23","unstructured":"Luong, H.T., Vu, H.Q.: A non-expert Kaldi recipe for Vietnamese speech recognition system. In: Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies (WLSI\/OIAF4HLT2016), pp. 51\u201355. The COLING 2016 Organizing Committee, Osaka, Japan (2016). https:\/\/aclanthology.org\/W16-5207"},{"key":"29_CR24","doi-asserted-by":"publisher","first-page":"9411","DOI":"10.1007\/s11042-020-10073-7","volume":"80","author":"M Malik","year":"2021","unstructured":"Malik, M., Malik, M.K., Mehmood, K., Makhdoom, I.: Automatic speech recognition: a survey. Multimedia Tools Appl. 80, 9411\u20139457 (2021)","journal-title":"Multimedia Tools Appl."},{"key":"29_CR25","doi-asserted-by":"crossref","unstructured":"Moritz, N., Hori, T., Le, J.: Streaming automatic speech recognition with the transformer model. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6074\u20136078. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9054476"},{"key":"29_CR26","doi-asserted-by":"publisher","first-page":"30069","DOI":"10.1109\/ACCESS.2022.3159339","volume":"10","author":"J Oruh","year":"2022","unstructured":"Oruh, J., Viriri, S., Adegun, A.: Long short-term memory recurrent neural network for automatic speech recognition. IEEE Access 10, 30069\u201330079 (2022)","journal-title":"IEEE Access"},{"key":"29_CR27","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: On the difficulty of training recurrent neural networks. In: Proceedings of the 30th International Conference on International Conference on Machine Learning, vol. 28, p. III-1310\u2013III-1318. ICML\u201913, JMLR.org (2013)"},{"key":"29_CR28","unstructured":"Radford, A., Kim, J.W., Xu, T., Brockman, G., Mcleavey, C., Sutskever, I.: Robust speech recognition via large-scale weak supervision. In: Krause, A., Brunskill, E., Cho, K., Engelhardt, B., Sabato, S., Scarlett, J. (eds.) Proceedings of the 40th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0202, pp. 28492\u201328518. PMLR (2023). https:\/\/proceedings.mlr.press\/v202\/radford23a.html"},{"key":"29_CR29","doi-asserted-by":"publisher","unstructured":"Rao, K., Sak, H., Prabhavalkar, R.: Exploring architectures, data and units for streaming end-to-end speech recognition with RNN-transducer. In: 2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 193\u2013199 (2017). https:\/\/doi.org\/10.1109\/ASRU.2017.8268935","DOI":"10.1109\/ASRU.2017.8268935"},{"key":"29_CR30","doi-asserted-by":"crossref","unstructured":"Reitmaier, T., et al.: Opportunities and challenges of automatic speech recognition systems for low-resource language speakers. In: Proceedings of the 2022 CHI Conference on Human Factors in Computing Systems, pp. 1\u201317 (2022)","DOI":"10.1145\/3491102.3517639"},{"key":"29_CR31","doi-asserted-by":"publisher","unstructured":"Reynolds, D.: Gaussian Mixture Models, pp. 659\u2013663. Springer US, Boston, MA (2009). https:\/\/doi.org\/10.1007\/978-0-387-73003-5_196","DOI":"10.1007\/978-0-387-73003-5_196"},{"key":"29_CR32","doi-asserted-by":"crossref","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning Internal Representations by Error Propagation, pp. 318\u2013362. MIT Press, Cambridge, MA, USA (1986)","DOI":"10.21236\/ADA164453"},{"issue":"5","key":"29_CR33","doi-asserted-by":"publisher","first-page":"965","DOI":"10.1109\/TASLP.2017.2672401","volume":"25","author":"TN Sainath","year":"2017","unstructured":"Sainath, T.N., et al.: Multichannel signal processing with deep neural networks for automatic speech recognition. IEEE\/ACM Trans. Audio, Speech Lang. Process. 25(5), 965\u2013979 (2017)","journal-title":"IEEE\/ACM Trans. Audio, Speech Lang. Process."},{"key":"29_CR34","doi-asserted-by":"publisher","unstructured":"Schmidhuber, J.: Deep learning in neural networks: an overview. Neural Netw. 61, 85\u2013117 (2015). https:\/\/doi.org\/10.1016\/j.neunet.2014.09.003, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0893608014002135","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"29_CR35","unstructured":"Sigurdsson, S., Petersen, K.B., Lehn-Schi\u00f8ler, T.: Mel frequency cepstral coefficients: an evaluation of robustness of MP3 encoded music. In: ISMIR, pp. 286\u2013289 (2006)"},{"key":"29_CR36","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1016\/j.neucom.2014.03.005","volume":"140","author":"SM Siniscalchi","year":"2014","unstructured":"Siniscalchi, S.M., Svendsen, T., Lee, C.H.: An artificial neural network approach to automatic speech processing. Neurocomputing 140, 326\u2013338 (2014)","journal-title":"Neurocomputing"},{"key":"29_CR37","unstructured":"Song, W., Cai, J.: End-to-end deep neural network for automatic speech recognition. Standford CS224D Reports, pp.\u00a01\u20138 (2015)"},{"key":"29_CR38","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1016\/j.csl.2018.04.003","volume":"52","author":"C Spille","year":"2018","unstructured":"Spille, C., Kollmeier, B., Meyer, B.T.: Comparing human and automatic speech recognition in simple and complex acoustic scenes. Comput. Speech Lang. 52, 123\u2013140 (2018)","journal-title":"Comput. Speech Lang."},{"key":"29_CR39","doi-asserted-by":"crossref","unstructured":"Srivastava, B.M.L., et al.: Interspeech 2018 low resource automatic speech recognition challenge for Indian languages. In: SLTU, pp. 11\u201314 (2018)","DOI":"10.21437\/SLTU.2018-3"},{"key":"29_CR40","doi-asserted-by":"crossref","unstructured":"Sudoh, K., Tsukada, H., Isozaki, H.: Incorporating speech recognition confidence into discriminative named entity recognition of speech data. In: Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics, pp. 617\u2013624 (2006)","DOI":"10.3115\/1220175.1220253"},{"key":"29_CR41","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 6000\u20136010. NIPS\u201917, Curran Associates Inc., Red Hook, NY, USA (2017)"},{"issue":"8","key":"29_CR42","doi-asserted-by":"publisher","first-page":"1240","DOI":"10.1109\/JSTSP.2017.2763455","volume":"11","author":"S Watanabe","year":"2017","unstructured":"Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T.: Hybrid CTC\/attention architecture for end-to-end speech recognition. IEEE J. Sel. Top. Sig. Process. 11(8), 1240\u20131253 (2017). https:\/\/doi.org\/10.1109\/JSTSP.2017.2763455","journal-title":"IEEE J. Sel. Top. Sig. Process."},{"key":"29_CR43","doi-asserted-by":"publisher","first-page":"332","DOI":"10.1007\/978-3-030-27529-7_29","volume-title":"Intelligent Robotics and Applications","author":"W Zhang","year":"2019","unstructured":"Zhang, W., Zhai, M., Huang, Z., Liu, C., Li, W., Cao, Y.: Towards end-to-end speech recognition with deep multipath convolutional neural networks. In: Yu, H., Liu, J., Liu, L., Ju, Z., Liu, Y., Zhou, D. (eds.) Intelligent Robotics and Applications, pp. 332\u2013341. Springer International Publishing, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-27529-7_29"}],"container-title":["Lecture Notes in Computer Science","Intelligent Information and Database Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-4985-0_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T11:15:34Z","timestamp":1721042134000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-4985-0_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819749843","9789819749850"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-4985-0_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"16 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACIIDS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Intelligent Information and Database Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ras Al Khaimah","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Arab Emirates","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 April 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 April 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aciids2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aciids.pwr.edu.pl\/2024\/index.php#about","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}