{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:07:28Z","timestamp":1755907648297,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":17,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,8]],"date-time":"2023-12-08T00:00:00Z","timestamp":1701993600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,8]]},"DOI":"10.1145\/3638584.3638634","type":"proceedings-article","created":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T11:15:19Z","timestamp":1710414919000},"page":"312-318","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Vietnamese Voice2Text: A Web Application for Whisper Implementation in Vietnamese Automatic Speech Recognition Tasks: Vietnamese Voice2Text"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-1333-1358","authenticated-orcid":false,"given":"Quangphuoc","family":"Nguyen","sequence":"first","affiliation":[{"name":"FPT University, Vietnam"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3015-4904","authenticated-orcid":false,"given":"Ngocminh","family":"Nguyen","sequence":"additional","affiliation":[{"name":"FPT University, Vietnam"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7775-5072","authenticated-orcid":false,"given":"Thanhluan","family":"Dang","sequence":"additional","affiliation":[{"name":"FPT University, Vietnam"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2714-6707","authenticated-orcid":false,"given":"Vanha","family":"Tran","sequence":"additional","affiliation":[{"name":"FPT University, Vietnam"}]}],"member":"320","published-online":{"date-parts":[[2024,3,14]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Yuhao Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33 (2020), 12449\u201312460."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1975.1162650"},{"key":"e_1_3_2_1_3_1","unstructured":"William\u00a0C Dersch. [n. d.]. IBM Archives: IBM Shoebox. URL http:\/\/www-03. ibm. com\/ibm\/history\/exhibits\/specialprod1\/specialprod1 { _} 7 ([n. d.])."},{"key":"e_1_3_2_1_4_1","volume-title":"Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100","author":"Gulati Anmol","year":"2020","unstructured":"Anmol Gulati, James Qin, Chung-Cheng Chiu, and Niki Parmar. 2020. Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100 (2020)."},{"key":"e_1_3_2_1_5_1","volume-title":"Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100","author":"Gulati Anmol","year":"2020","unstructured":"Anmol Gulati, James Qin, Chung-Cheng Chiu, Niki Parmar, and Zhang. 2020. Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100 (2020)."},{"key":"e_1_3_2_1_6_1","unstructured":"DV Hai and ASR Challenge. 2021. Vietnamese Automatic Speech Recognition."},{"key":"e_1_3_2_1_7_1","volume-title":"Deep speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567","author":"Hannun Awni","year":"2014","unstructured":"Awni Hannun, Carl Case, Jared Casper, Bryan Catanzaro, Greg Diamos, Erich Elsen, Ryan Prenger, Sanjeev Satheesh, Shubho Sengupta, Adam Coates, 2014. Deep speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567 (2014)."},{"key":"e_1_3_2_1_8_1","volume-title":"Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups","author":"Hinton Geoffrey","year":"2012","unstructured":"Geoffrey Hinton, Li Deng, Dong Yu, George\u00a0E Dahl, and Abdel-rahman Mohamed. 2012. Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups. IEEE Signal processing magazine 29, 6 (2012), 82\u201397."},{"key":"e_1_3_2_1_9_1","volume-title":"Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups","author":"Hinton Geoffrey","year":"2012","unstructured":"Geoffrey Hinton, Li Deng, Dong Yu, George\u00a0E Dahl, Abdel-rahman Mohamed, Navdeep Jaitly, Andrew Senior, Vincent Vanhoucke, Patrick Nguyen, Tara\u00a0N Sainath, 2012. Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups. IEEE Signal processing magazine 29, 6 (2012), 82\u201397."},{"key":"e_1_3_2_1_10_1","volume-title":"Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups","author":"Hinton Geoffrey","year":"2012","unstructured":"Geoffrey Hinton, Li Deng, Dong Yu, George\u00a0E Dahl, Abdel-rahman Mohamed, Navdeep Jaitly, Andrew Senior, Vincent Vanhoucke, Patrick Nguyen, Tara\u00a0N Sainath, 2012. Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups. IEEE Signal processing magazine 29, 6 (2012), 82\u201397."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4842-7449-1"},{"key":"e_1_3_2_1_12_1","first-page":"64","article-title":"Recurrent neural networks","volume":"5","author":"Medsker R","year":"2001","unstructured":"Larry\u00a0R Medsker and LC Jain. 2001. Recurrent neural networks. Design and Applications 5, 64-67 (2001), 2.","journal-title":"Design and Applications"},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Machine Learning. PMLR, 28492\u201328518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In International Conference on Machine Learning. PMLR, 28492\u201328518."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2021.3060483"},{"key":"e_1_3_2_1_15_1","volume":"38","author":"Son Dang\u00a0Dinh","year":"2022","unstructured":"Dang\u00a0Dinh Son, Dang\u00a0Xuan Vuong, Duong\u00a0Quang Tien, Ta\u00a0Bao Thang, 2022. ASR-VLSP 2021: Conformer with Gradient Mask and Stochastic Weight Averaging for Vietnamese Automatic Speech Recognition. VNU Journal of Science: Computer Science and Communication Engineering 38, 1 (2022).","journal-title":"VNU Journal of Science: Computer Science and Communication Engineering"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.25073\/2588-1086\/vnucsce.332"},{"key":"e_1_3_2_1_17_1","volume-title":"Text-to-speech shared task in VLSP campaign 2019: evaluating Vietnamese speech synthesis on common datasets. Vietnamese Language Signal Processing. VLSP","author":"Thi\u00a0Thu Trang NGUYEN","year":"2019","unstructured":"NGUYEN Thi\u00a0Thu Trang and NGUYEN\u00a0Xuan Tung. 2019. Text-to-speech shared task in VLSP campaign 2019: evaluating Vietnamese speech synthesis on common datasets. Vietnamese Language Signal Processing. VLSP (2019)."}],"event":{"name":"CSAI 2023: 2023 7th International Conference on Computer Science and Artificial Intelligence","acronym":"CSAI 2023","location":"Beijing China"},"container-title":["Proceedings of the 2023 7th International Conference on Computer Science and Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3638584.3638634","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3638584.3638634","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T14:57:54Z","timestamp":1755874674000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3638584.3638634"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,8]]},"references-count":17,"alternative-id":["10.1145\/3638584.3638634","10.1145\/3638584"],"URL":"https:\/\/doi.org\/10.1145\/3638584.3638634","relation":{},"subject":[],"published":{"date-parts":[[2023,12,8]]},"assertion":[{"value":"2024-03-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}