{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T19:05:46Z","timestamp":1775329546355,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789811980688","type":"print"},{"value":"9789811980695","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-981-19-8069-5_48","type":"book-chapter","created":{"date-parts":[[2022,11,19]],"date-time":"2022-11-19T10:07:42Z","timestamp":1668852462000},"page":"682-689","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Deep Models for\u00a0Mispronounce Prediction for\u00a0Vietnamese Learners of\u00a0English"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6801-1123","authenticated-orcid":false,"given":"Trang","family":"Phung","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5458-3713","authenticated-orcid":false,"given":"Duc-Quang","family":"Vu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4429-2343","authenticated-orcid":false,"given":"Ha","family":"Mai-Tan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4800-6666","authenticated-orcid":false,"given":"Le Thi","family":"Nhung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,11,20]]},"reference":[{"key":"48_CR1","doi-asserted-by":"crossref","unstructured":"Chen, X., Girshick, R., He, K., Doll\u00e1r, P.: Tensormask: a foundation for dense object segmentation. In: ICCV, pp. 2061\u20132069 (2019)","DOI":"10.1109\/ICCV.2019.00215"},{"key":"48_CR2","doi-asserted-by":"crossref","unstructured":"Cheng, S., Liu, Z., Li, L., Tang, Z., Wang, D., Zheng, T.F.: Asr-free pronunciation assessment. arXiv preprint arXiv:2005.11902 (2020)","DOI":"10.21437\/Interspeech.2020-2623"},{"key":"48_CR3","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1558\/cj.v16i3.425-445","volume":"16","author":"J Dalby","year":"1999","unstructured":"Dalby, J., Kewley-Port, D.: Explicit pronunciation training using automatic speech recognition technology. CALICO J. 16, 425\u2013445 (1999)","journal-title":"CALICO J."},{"issue":"10","key":"48_CR4","doi-asserted-by":"publisher","first-page":"832","DOI":"10.1016\/j.specom.2009.04.005","volume":"51","author":"M Eskenazi","year":"2009","unstructured":"Eskenazi, M.: An overview of spoken language technology for education. Speech Commun. 51(10), 832\u2013844 (2009)","journal-title":"Speech Commun."},{"key":"48_CR5","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: ICCV, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"48_CR6","doi-asserted-by":"crossref","unstructured":"Graham, C., Nolan, F.: Articulation rate as a metric in spoken language assessment. In: INTERSPEECH (2019)","DOI":"10.21437\/Interspeech.2019-2098"},{"key":"48_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"48_CR8","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"48_CR9","doi-asserted-by":"crossref","unstructured":"Huang, H., et al.: Unet 3+: a full-scale connected unet for medical image segmentation. In: ICASSP, pp. 1055\u20131059. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053405"},{"key":"48_CR10","unstructured":"Knill, K., Gales, M., et al.: Automatically grading learners\u2019 English using a gaussian process. In: ISCA (2015)"},{"key":"48_CR11","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1558\/cj.v16i3.295-310","volume":"16","author":"CSA LaRocca","year":"1999","unstructured":"LaRocca, C.S.A., et al.: On the path to 2x learning: exploring the possibilities of advanced speech recognition. CALICO J. 16, 295\u2013310 (1999)","journal-title":"CALICO J."},{"key":"48_CR12","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1558\/cj.v16i3.407-424","volume":"16","author":"J Mostow","year":"1999","unstructured":"Mostow, J., Aist, G.: Giving help and praise in a reading tutor with imperfect listening-because automated speech recognition means never being able to say you\u2019re certain. CALICO J. 16, 407\u2013424 (1999)","journal-title":"CALICO J."},{"issue":"5","key":"48_CR13","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1080\/09588220802447651","volume":"21","author":"A Neri","year":"2008","unstructured":"Neri, A., Mich, O., Gerosa, M., Giuliani, D.: The effectiveness of computer assisted pronunciation training for foreign language learning by children. Comput. Assist. Lang. Learn. 21(5), 393\u2013408 (2008)","journal-title":"Comput. Assist. Lang. Learn."},{"key":"48_CR14","doi-asserted-by":"crossref","unstructured":"Neumeyer, L., et al.: Automatic text-independent pronunciation scoring of foreign language student speech. In: ICSLP 1996, vol. 3, pp. 1457\u20131460. IEEE (1996)","DOI":"10.21437\/ICSLP.1996-372"},{"key":"48_CR15","doi-asserted-by":"publisher","unstructured":"Phung, T., Nguyen, V.T., Ma, T.H.T., Duc, Q.V.: A (2+1)D attention convolutional neural network for video prediction. In: Dang, N.H.T., Zhang, Y.D., Tavares, J.M.R.S., Chen, B.H. (eds.) Artificial Intelligence in Data and Big Data Processing. ICABDE 2021. Lecture Notes on Data Engineering and Communications Technologies, vol. 124, pp. 395\u2013406. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-030-97610-1_31","DOI":"10.1007\/978-3-030-97610-1_31"},{"key":"48_CR16","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection. In: CVPR, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"issue":"1","key":"48_CR17","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1080\/07434610012331278904","volume":"16","author":"K Rosen","year":"2000","unstructured":"Rosen, K., Yampolsky, S.: Automatic speech recognition and a review of its functioning with dysarthric speech. Augment. Altern. Commun. 16(1), 48\u201360 (2000)","journal-title":"Augment. Altern. Commun."},{"issue":"10","key":"48_CR18","doi-asserted-by":"publisher","first-page":"845","DOI":"10.1016\/j.specom.2009.05.007","volume":"51","author":"H Strik","year":"2009","unstructured":"Strik, H., et al.: Comparing different approaches for automatic pronunciation error detection. Speech Commun. 51(10), 845\u2013852 (2009)","journal-title":"Speech Commun."},{"key":"48_CR19","doi-asserted-by":"crossref","unstructured":"Sudhakara, S., et al.: An improved goodness of pronunciation (gop) measure for pronunciation evaluation with DNN-hmm system considering hmm transition probabilities. In: INTERSPEECH, pp. 954\u2013958 (2019)","DOI":"10.21437\/Interspeech.2019-2363"},{"key":"48_CR20","doi-asserted-by":"crossref","unstructured":"Tan, H.M., et al.: Selective mutual learning: an efficient approach for single channel speech separation. In: ICASSP, pp. 3678\u20133682. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9746022"},{"key":"48_CR21","unstructured":"Tan, M., Le, Q.: Efficientnet: rethinking model scaling for convolutional neural networks. In: ICML, pp. 6105\u20136114. PMLR (2019)"},{"key":"48_CR22","doi-asserted-by":"crossref","unstructured":"Vieira, J.P.A., Moura, R.S.: An analysis of convolutional neural networks for sentence classification. In: CLEI, pp. 1\u20135. IEEE (2017)","DOI":"10.1109\/CLEI.2017.8226381"},{"key":"48_CR23","doi-asserted-by":"publisher","first-page":"105711","DOI":"10.1109\/ACCESS.2021.3099856","volume":"9","author":"DQ Vu","year":"2021","unstructured":"Vu, D.Q., Le, N., Wang, J.C.: Teaching yourself: a self-knowledge distillation approach to action recognition. IEEE Access 9, 105711\u2013105723 (2021)","journal-title":"IEEE Access"},{"key":"48_CR24","unstructured":"Vu, D.Q., Le, N.T., Wang, J.C.: Self-supervised learning via multi-transformation classification for action recognition. arXiv preprint arXiv:2102.10378 (2021)"},{"key":"48_CR25","doi-asserted-by":"crossref","unstructured":"Vu, D.Q., Le, N.T., Wang, J.C.: (2+1)d distilled shufflenet: a lightweight unsupervised distillation network for human action recognition. In: ICPR. IEEE (2022)","DOI":"10.1109\/ICPR56361.2022.9956634"},{"key":"48_CR26","doi-asserted-by":"crossref","unstructured":"Vu, D.Q., et al.: A novel self-knowledge distillation approach with SIAMESE representation learning for action recognition. In: VCIP, pp. 1\u20135. IEEE (2021)","DOI":"10.1109\/VCIP53242.2021.9675335"},{"key":"48_CR27","unstructured":"Witt, S.M.: Automatic error detection in pronunciation training: where we are and where we need to go. In: International Symposium on Automatic Detection on Errors in Pronunciation Training, pp. 1\u20138 (2012)"},{"issue":"2","key":"48_CR28","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1080\/10400435.2010.483646","volume":"22","author":"V Young","year":"2010","unstructured":"Young, V., Mihailidis, A.: Difficulties in automatic speech recognition of dysarthric speakers and implications for speech-based applications used by the elderly: a literature review. Assist. Technol. 22(2), 99\u2013112 (2010)","journal-title":"Assist. Technol."},{"key":"48_CR29","doi-asserted-by":"crossref","unstructured":"Zhao, G., et al.: L2-arctic: a non-native English speech corpus. In: INTERSPEECH, pp. 2783\u20132787 (2018)","DOI":"10.21437\/Interspeech.2018-1110"}],"container-title":["Communications in Computer and Information Science","Future Data and Security Engineering. Big Data, Security and Privacy, Smart City and Industry 4.0 Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-19-8069-5_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,13]],"date-time":"2023-03-13T03:17:58Z","timestamp":1678677478000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-19-8069-5_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9789811980688","9789811980695"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-19-8069-5_48","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"20 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"FDSE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Future Data and Security Engineering","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ho Chi Minh City","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 November 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 November 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"fdse2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/thefdse.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"170","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"12","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4 full papers from invited keynote speakers","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}