{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:26:26Z","timestamp":1740122786947,"version":"3.37.3"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,1,2]],"date-time":"2021-01-02T00:00:00Z","timestamp":1609545600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,2]],"date-time":"2021-01-02T00:00:00Z","timestamp":1609545600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s10772-020-09788-7","type":"journal-article","created":{"date-parts":[[2021,1,2]],"date-time":"2021-01-02T05:02:41Z","timestamp":1609563761000},"page":"183-192","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Determining the adaptation data saturation of ASR systems for dysarthric speakers"],"prefix":"10.1007","volume":"24","author":[{"given":"Bassam Ali","family":"Al-Qatab","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2835-4084","authenticated-orcid":false,"given":"Mumtaz Begum","family":"Mustafa","sequence":"additional","affiliation":[]},{"given":"Siti Salwah","family":"Salim","sequence":"additional","affiliation":[]},{"given":"Asmiza Abdul","family":"Sani","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,1,2]]},"reference":[{"key":"9788_CR1","unstructured":"Allison, B., Guthrie, D., & Guthrie, L. (2006). Another look at the data sparsity problem. In: P. Sojka & I. Kope\u010dek (Eds.), Text, speech and dialogue: 9th international conference, TSD 2006, Brno, Czech Republic, September 11\u201315, 2006 Proceedings (pp. 327\u2013334). Berlin: Springer."},{"key":"9788_CR2","doi-asserted-by":"crossref","unstructured":"Al-Qatab, B. A., Mustafa, M. B., & Salim, S. S. (2014). Severity based adaptation for ASR to Aid dysarthric speakers. The 8th Asia Modelling Symposium (pp. 165\u2013169).","DOI":"10.1109\/AMS.2014.40"},{"key":"9788_CR3","doi-asserted-by":"crossref","unstructured":"Choi, D. L., Kim, B. W., Lee, Y. J., Um, Y., & Chung, M. (2011, October). Design and creation of dysarthric speech database for development of QoLT software technology. In IEEE 2011 International Conference on Speech Database and Assessments (Oriental COCOSDA), on (pp. 47\u201350).","DOI":"10.1109\/ICSDA.2011.6085978"},{"key":"9788_CR4","doi-asserted-by":"publisher","first-page":"254","DOI":"10.1109\/SLT.2014.7078583","volume":"2014","author":"H Christensen","year":"2014","unstructured":"Christensen, H., Casanueva, I., Cunningham, S., Green, P., & Hain, T. (2014). Automatic selection of speakers for improved acoustic modelling: Recognition of disordered speech with sparse data. IEEE Spoken Language Technology Workshop (SLT), 2014, 254\u2013259.","journal-title":"IEEE Spoken Language Technology Workshop (SLT)"},{"key":"9788_CR5","doi-asserted-by":"publisher","first-page":"246","DOI":"10.1044\/jshr.1202.246","volume":"12","author":"FL Darley","year":"1969","unstructured":"Darley, F. L., Aronson, A. E., & Brown, J. R. (1969). Differential diagnostic patterns of dysarthria. Journal of Speech, Language, and Hearing Research, 12, 246\u2013269. https:\/\/doi.org\/10.1044\/jshr.1202.246.","journal-title":"Journal of Speech, Language, and Hearing Research"},{"issue":"1\u20132","key":"9788_CR6","first-page":"1","volume":"113","author":"F De Wet","year":"2017","unstructured":"De Wet, F., Kleynhans, N., Van Compernolle, D., & Sahraeian, R. (2017). Speech recognition for under-resourced languages: Data sharing in hidden Markov model systems. South African Journal of Science, 113(1\u20132), 1\u20139.","journal-title":"South African Journal of Science"},{"key":"9788_CR7","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1109\/89.506933","volume":"4","author":"VV Digalakis","year":"1996","unstructured":"Digalakis, V. V., & Neumeyer, L. G. (1996). Speaker adaptation using combined transformation and Bayesian methods. Speech and Audio Processing, IEEE Transactions on, 4, 294\u2013300. https:\/\/doi.org\/10.1109\/89.506933.","journal-title":"Speech and Audio Processing, IEEE Transactions on"},{"key":"9788_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00034-020-01419-5","volume":"39","author":"G Diwakar","year":"2020","unstructured":"Diwakar, G., & Karjigi, V. (2020). Improving speech to text alignment based on repetition detection for dysarthric speech. Circuits, Systems, and Signal Processing, 39, 1\u201325.","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"9788_CR9","volume-title":"Motor speech disorders: Substrates, differential diagnosis, and management","author":"JR Duffy","year":"2012","unstructured":"Duffy, J. R. (2012). Motor speech disorders: Substrates, differential diagnosis, and management. Amsterdam: Elsevier Health Sciences."},{"key":"9788_CR10","doi-asserted-by":"publisher","first-page":"165","DOI":"10.3109\/13682828009112541","volume":"15","author":"P Enderby","year":"1980","unstructured":"Enderby, P. (1980). Frenchay dysarthria assessment. British Journal of Disorders of Communication, 15, 165\u2013173.","journal-title":"British Journal of Disorders of Communication"},{"key":"9788_CR11","doi-asserted-by":"publisher","first-page":"1366","DOI":"10.1109\/TASL.2009.2034187","volume":"18","author":"M Ferras","year":"2010","unstructured":"Ferras, M., Cheung-Chi, L., Barras, C., & Gauvain, J. (2010). Comparison of speaker adaptation methods as feature extraction for SVM-based speaker recognition. IEEE Transactions on Audio, Speech, and Language Processing, 18, 1366\u20131378. https:\/\/doi.org\/10.1109\/TASL.2009.2034187.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9788_CR12","doi-asserted-by":"publisher","unstructured":"Gales, M. J. F. (2001). Adaptive training for robust ASR. In IEEE Workshop on Automatic Speech Recognition and Understanding, ASRU '01 (pp. 15\u201320). https:\/\/doi.org\/10.1006\/csla.1996.0013","DOI":"10.1006\/csla.1996.0013"},{"key":"9788_CR13","volume-title":"TIMIT: Acoustic-phonetic continuous speech corpus","author":"JS Garofolo","year":"1993","unstructured":"Garofolo, J. S., & Consortium, L. D. (1993). TIMIT: Acoustic-phonetic continuous speech corpus. Philadelphia: Linguistic Data Consortium."},{"key":"9788_CR14","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1109\/89.279278","volume":"2","author":"J Gauvain","year":"1994","unstructured":"Gauvain, J., & Chin-Hui, L. (1994). Maximum a posteriori estimation for multivariate Gaussian mixture observations of Markov chains. IEEE Transactions on Speech and Audio Processing, 2, 291\u2013298. https:\/\/doi.org\/10.1109\/89.279278.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9788_CR15","first-page":"1","volume":"99","author":"S Goronzy","year":"1999","unstructured":"Goronzy, S., & Kompe, R. (1999). A combined MAP + MLLR approach for speaker adaptation. Proceedings of the Sony Research Forum, 99, 1.","journal-title":"Proceedings of the Sony Research Forum"},{"key":"9788_CR16","first-page":"1189","volume-title":"Automatic speech recognition with sparse training data for dysarthric speakers","author":"P Green","year":"2003","unstructured":"Green, P., Carmichael, J., Hatzis, A., Enderby, P., Hawley, M. S., & Parker, M. (2003). Automatic speech recognition with sparse training data for dysarthric speakers (pp. 1189\u20131192). Geneva: INTERSPEECH."},{"key":"9788_CR17","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1016\/j.specom.2013.01.007","volume":"56","author":"D Imseng","year":"2014","unstructured":"Imseng, D., Motlicek, P., Bourlard, H., & Garner, P. N. (2014). Using out-of-language data to improve an under-resourced speech recognizer. Speech Communication, 56, 142\u2013151.","journal-title":"Speech Communication"},{"key":"9788_CR18","doi-asserted-by":"publisher","first-page":"637","DOI":"10.1109\/TNSRE.2018.2802914","volume":"26","author":"NM Joy","year":"2018","unstructured":"Joy, N. M., & Umesh, S. (2018). Improving acoustic models in TORGO dysarthric speech database. IEEE Transactions on Neural Systems and Rehabilitation Engineering, 26, 637\u2013645. https:\/\/doi.org\/10.1109\/TNSRE.2018.2802914.","journal-title":"IEEE Transactions on Neural Systems and Rehabilitation Engineering"},{"key":"9788_CR19","doi-asserted-by":"publisher","first-page":"1534","DOI":"10.1016\/j.camwa.2009.06.051","volume":"58","author":"P Kayasith","year":"2009","unstructured":"Kayasith, P., & Theeramunkong, T. (2009). Speech confusion index: A confusion-based speech quality indicator and recognition rate prediction for dysarthria. Computers & Mathematics with Applications, 58, 1534\u20131549. https:\/\/doi.org\/10.1016\/j.camwa.2009.06.051.","journal-title":"Computers & Mathematics with Applications"},{"key":"9788_CR20","doi-asserted-by":"publisher","first-page":"482","DOI":"10.1044\/jshd.5404.482","volume":"54","author":"RD Kent","year":"1989","unstructured":"Kent, R. D., Weismer, G., Kent, J. F., & Rosenbek, J. C. (1989). Toward phonetic intelligibility testing in dysarthria. Journal of Speech and Hearing Disorders, 54, 482\u2013499.","journal-title":"Journal of Speech and Hearing Disorders"},{"key":"9788_CR21","doi-asserted-by":"crossref","unstructured":"Kim, H., Hasegawa-Johnson, M., Perlman, A., Gunderson, J., Huang, T. S., Watkin, K., & Frame, S. (2008). Dysarthric speech database for universal access research. In\u00a0Ninth Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2008-480"},{"issue":"9","key":"9788_CR22","doi-asserted-by":"publisher","first-page":"1581","DOI":"10.1109\/TNSRE.2017.2681691","volume":"25","author":"M Kim","year":"2017","unstructured":"Kim, M., Kim, Y., Yoo, J., Wang, J., & Kim, H. (2017). Regularized speaker adaptation of KL-HMM for dysarthric speech recognition. IEEE Transactions on Neural Systems and Rehabilitation Engineering, 25(9), 1581\u20131591.","journal-title":"IEEE Transactions on Neural Systems and Rehabilitation Engineering"},{"key":"9788_CR23","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1080\/07434619712331277858","volume":"13","author":"AL Kotler","year":"1997","unstructured":"Kotler, A. L., & Thomas-Stonell, N. (1997). Effects of speech training on the accuracy of speech recognition for an individual with a speech impairment. Augmentative and Alternative Communication, 13, 71\u201380. https:\/\/doi.org\/10.1080\/07434619712331277858.","journal-title":"Augmentative and Alternative Communication"},{"key":"9788_CR24","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1006\/csla.1995.0010","volume":"9","author":"CJ Leggetter","year":"1995","unstructured":"Leggetter, C. J., & Woodland, P. (1995). Maximum likelihood linear regression for speaker adaptation of continuous density hidden Markov models. Computer Speech & Language, 9, 171\u2013185.","journal-title":"Computer Speech & Language"},{"key":"9788_CR25","unstructured":"Leino, K., & Kurimo, M. (2017). Acoustic Model Compression with MAP adaptation. Paper presented at the Proceedings of the 21st Nordic Conference on Computational Linguistics, NoDaLiDa, 22\u201324 May 2017, Gothenburg, Sweden."},{"key":"9788_CR26","volume-title":"CSR-II (WSJ1) Complete LDC94S13A","author":"Linguistic Data Consortium","year":"1994","unstructured":"Linguistic Data Consortium. (1994). CSR-II (WSJ1) Complete LDC94S13A. Philadelphia: DVD."},{"key":"9788_CR27","doi-asserted-by":"crossref","unstructured":"Mak, B., Tsz-Chung, L., & Hsiao, R. (2006). Improving reference speaker weighting adaptation by the use of maximum-likelihood reference speakers. IEEE International Conference on Acoustics Speech and Signal Processing Proceedings, I-I.","DOI":"10.1109\/ICASSP.2006.1659999"},{"key":"9788_CR28","doi-asserted-by":"crossref","unstructured":"Menendez-Pidal, X., Polikoff, J.B., Peters, S.M., Leonzio, J.E., & Bunnell, H.T. (1996). The Nemours database of dysarthric speech. Fourth International Conference on Spoken Language, ICSLP 96 Proceedings, (Vol. 1963, pp. 1962\u20131965).","DOI":"10.1109\/ICSLP.1996.608020"},{"key":"9788_CR29","doi-asserted-by":"crossref","unstructured":"Mengistu, K.T., & Rudzicz, F. (2011). Adapting acoustic and lexical models to dysarthric speech. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), (pp. 4924\u20134927).","DOI":"10.1109\/ICASSP.2011.5947460"},{"key":"9788_CR30","doi-asserted-by":"publisher","first-page":"e86285","DOI":"10.1371\/journal.pone.0086285","volume":"9","author":"MB Mustafa","year":"2014","unstructured":"Mustafa, M. B., Salim, S. S., Mohamed, N., Al-Qatab, B., & Siong, C. E. (2014). Severity-based adaptation with limited data for ASR to aid dysarthric speakers. PLoS ONE, 9, e86285. https:\/\/doi.org\/10.1371\/journal.pone.0086285.","journal-title":"PLoS ONE"},{"key":"9788_CR31","unstructured":"Paul, D. B., & Baker, J. M. (1992). The design for the wall street journal-based CSR corpus. Proceedings of the workshop on Speech and Natural Language (pp. 357\u2013362). Harriman, New York: Association for Computational Linguistics."},{"key":"9788_CR32","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1080\/aac.17.4.265.275","volume":"17","author":"P Raghavendra","year":"2001","unstructured":"Raghavendra, P., Rosengren, E., & Hunnicutt, S. (2001). An investigation of different degrees of dysarthric speech as input to speaker-adaptive and speaker-dependent recognition systems. Augmentative and Alternative Communication, 17, 265\u2013275. https:\/\/doi.org\/10.1080\/aac.17.4.265.275.","journal-title":"Augmentative and Alternative Communication"},{"key":"9788_CR33","unstructured":"Rudzicz, F. (2007). Comparing speaker-dependent and speaker-adaptive acoustic models for recognizing dysarthric speech. Proceedings of the 9th international ACM SIGACCESS conference on Computers and accessibility (pp. 255\u2013256). Tempe, Arizona, USA: ACM."},{"key":"9788_CR34","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1007\/s10579-011-9145-0","volume":"46","author":"F Rudzicz","year":"2012","unstructured":"Rudzicz, F., Namasivayam, A. K., & Wolff, T. (2012). The TORGO database of acoustic and articulatory speech from speakers with dysarthria. Language Resources and Evaluation, 46, 523\u2013541.","journal-title":"Language Resources and Evaluation"},{"key":"9788_CR35","unstructured":"Sehgal, S., & Cunningham, S. (2015). Model adaptation and adaptive training for the recognition of dysarthric speech. In\u00a0Proceedings of SLPAT 2015: 6th Workshop on Speech and Language Processing for Assistive Technologies, (pp. 65\u201371)."},{"key":"9788_CR36","doi-asserted-by":"publisher","first-page":"1147","DOI":"10.1016\/j.csl.2012.10.002","volume":"27","author":"HV Sharma","year":"2013","unstructured":"Sharma, H. V., & Hasegawa-Johnson, M. (2013). Acoustic model adaptation using in-domain background models for dysarthric speech recognition. Computer Speech & Language, 27, 1147\u20131162. https:\/\/doi.org\/10.1016\/j.csl.2012.10.002.","journal-title":"Computer Speech & Language"},{"key":"9788_CR37","unstructured":"Shinoda, K. (2011). Speaker adaptation techniques for automatic speech recognition. Proc APSIPA ASC 2011 Xi'an."},{"key":"9788_CR38","first-page":"1","volume":"2015","author":"R Sriranjani","year":"2015","unstructured":"Sriranjani, R., Ramasubba Reddy, M., & Umesh, S. (2015). Improved acoustic modeling for automatic dysarthric speech recognition. IEEE Twenty First National Conference on Communications (NCC), 2015, 1\u20136.","journal-title":"IEEE Twenty First National Conference on Communications (NCC)"},{"key":"9788_CR39","doi-asserted-by":"crossref","unstructured":"Stadermann, J., & Rigoll, G. (2005, March). Two-stage speaker adaptation of hybrid tied-posterior acoustic models. In\u00a0Proceedings.(ICASSP'05). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005.\u00a0(Vol. 1, pp. I\u2013977). IEEE.","DOI":"10.1109\/ICASSP.2005.1415279"},{"key":"9788_CR40","doi-asserted-by":"publisher","first-page":"751","DOI":"10.1109\/TASSP.1987.1165203","volume":"35","author":"R Stern","year":"1987","unstructured":"Stern, R., & Lasry, M. (1987). Dynamic speaker adaptation for feature-based isolated word recognition. IEEE Transactions on Acoustics, Speech, and Signal Processing, 35, 751\u2013763. https:\/\/doi.org\/10.1109\/TASSP.1987.1165203.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9788_CR41","doi-asserted-by":"crossref","unstructured":"Takashima, R., Takiguchi, T., & Ariki, Y. (2020). Two-Step Acoustic Model Adaptation for Dysarthric Speech Recognition. Paper presented at the ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP).","DOI":"10.1109\/ICASSP40776.2020.9053725"},{"key":"9788_CR42","doi-asserted-by":"crossref","unstructured":"Von Agris, U., Blomer, C., & Kraiss, K. F. (2008). Rapid signer adaptation for continuous sign language recognition using a combined approach of eigenvoices, MLLR, and MAP. In\u00a02008 19th International Conference on Pattern Recognition\u00a0(pp. 1\u20134). IEEE.","DOI":"10.1109\/ICPR.2008.4761363"},{"key":"9788_CR43","unstructured":"Xiong, F., Barker, J., & Christensen, H. (2018). Deep learning of articulatory-based representations and applications for improving dysarthric speech recognition. Paper presented at the Speech Communication; 13th ITG-Symposium."},{"key":"9788_CR44","doi-asserted-by":"crossref","unstructured":"Xiong, F., Barker, J., & Christensen, H. (2019). Phonetic analysis of dysarthric speech tempo and applications to robust personalised dysarthric speech recognition. Paper presented at the ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP).","DOI":"10.1109\/ICASSP.2019.8683091"},{"key":"9788_CR45","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/j.csl.2019.05.002","volume":"58","author":"E Y\u0131lmaz","year":"2019","unstructured":"Y\u0131lmaz, E., Mitra, V., Sivaraman, G., & Franco, H. (2019). Articulatory and bottleneck features for speaker-independent ASR of dysarthric speech. Computer Speech & Language, 58, 319\u2013334.","journal-title":"Computer Speech & Language"},{"key":"9788_CR46","volume-title":"The HTK book (for HTK version 3.4)","author":"S Young","year":"2009","unstructured":"Young, S., Evermann, G., Gales, M., Hain, T., Kershaw, D., Liu, X., et al. (2009). The HTK book (for HTK version 3.4). Cambridge: Microsoft Corporation and Cambridge University Engineering Department."},{"key":"9788_CR47","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1080\/10400435.2010.483646","volume":"22","author":"V Young","year":"2010","unstructured":"Young, V., & Mihailidis, A. (2010). Difficulties in automatic speech recognition of dysarthric speakers and implications for speech-based applications used by the elderly: A literature review. Assistive Technology, 22, 99\u2013112. https:\/\/doi.org\/10.1080\/10400435.2010.483646.","journal-title":"Assistive Technology"},{"key":"9788_CR48","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/89.294352","volume":"2","author":"Z Yunxin","year":"1994","unstructured":"Yunxin, Z. (1994). An acoustic-phonetic-based speaker adaptation technique for improving speaker-independent continuous speech recognition. IEEE Transactions on Speech and Audio Processing, 2, 380\u2013394. https:\/\/doi.org\/10.1109\/89.294352.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"issue":"3","key":"9788_CR49","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/89.294352","volume":"2","author":"Y Zhao","year":"1994","unstructured":"Zhao, Y. (1994). An acoustic-phonetic-based speaker adaptation technique for improving speaker-independent continuous speech recognition. IEEE Transactions on Speech and Audio Processing, 2(3), 380\u2013394.","journal-title":"IEEE Transactions on Speech and Audio Processing"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-020-09788-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-020-09788-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-020-09788-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T15:05:19Z","timestamp":1724166319000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-020-09788-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,2]]},"references-count":49,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["9788"],"URL":"https:\/\/doi.org\/10.1007\/s10772-020-09788-7","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2021,1,2]]},"assertion":[{"value":"24 February 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 November 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 January 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}