{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,26]],"date-time":"2026-06-26T22:27:53Z","timestamp":1782512873886,"version":"3.54.5"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,7,19]],"date-time":"2021-07-19T00:00:00Z","timestamp":1626652800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,7,19]],"date-time":"2021-07-19T00:00:00Z","timestamp":1626652800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s00530-021-00822-5","type":"journal-article","created":{"date-parts":[[2021,7,19]],"date-time":"2021-07-19T03:13:32Z","timestamp":1626664412000},"page":"1699-1715","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["A computer-aided speech analytics approach for pronunciation feedback using deep feature clustering"],"prefix":"10.1007","volume":"29","author":[{"given":"Faria","family":"Nazir","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Muhammad Nadeem","family":"Majeed","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mustansar Ali","family":"Ghazanfar","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2709-0849","authenticated-orcid":false,"given":"Muazzam","family":"Maqsood","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2021,7,19]]},"reference":[{"key":"822_CR1","first-page":"102","volume":"2000","author":"K Precoda","year":"2000","unstructured":"Precoda, K., Halverson, C.A., Franco, H.: Effects of speech recognition-based pronunciation feedback on second-language pronunciation ability. Proc. InSTILL 2000, 102\u2013105 (2000)","journal-title":"Proc. InSTILL"},{"issue":"3","key":"822_CR2","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1007\/s10772-015-9271-y","volume":"18","author":"SP Panda","year":"2015","unstructured":"Panda, S.P., Nayak, A.K.: An efficient model for text-to-speech synthesis in Indian languages. Int. J. Speech Technol. 18(3), 305\u2013315 (2015)","journal-title":"Int. J. Speech Technol."},{"key":"822_CR3","doi-asserted-by":"crossref","unstructured":"Franco, H., Neumeyer, L., Kim, Y., Ronen, O.: Automatic pronunciation scoring for language instruction. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, IEEE, pp. 1471\u20131474 (1997)","DOI":"10.1109\/ICASSP.1997.596227"},{"key":"822_CR4","doi-asserted-by":"crossref","unstructured":"Neumeyer, L., Franco, H., Weintraub, M., Price, P.: Automatic text-independent pronunciation scoring of foreign language student speech. In: Proceeding of Fourth International Conference on Spoken Language Processing. ICSLP'96, IEEE, pp. 1457\u20131460 (1996)","DOI":"10.21437\/ICSLP.1996-372"},{"key":"822_CR5","unstructured":"Witt, S.M.: Automatic error detection in pronunciation training: Where we are and where we need to go. In: International Symposium on Automatic Detection of Errors in Pronunciation Training, Stockholm, Sweden (2012)"},{"issue":"6","key":"822_CR6","doi-asserted-by":"publisher","first-page":"499","DOI":"10.1007\/s00530-012-0266-0","volume":"18","author":"RP Hafen","year":"2012","unstructured":"Hafen, R.P., Henry, M.J.: Speech information retrieval: a review. Multimed. Syst. 18(6), 499\u2013518 (2012)","journal-title":"Multimed. Syst."},{"key":"822_CR7","doi-asserted-by":"crossref","unstructured":"Franco, H., Neumeyer, L., Ramos, M., Bratt, H.: Automatic detection of phone-level mispronunciation for language learning. In: Sixth European Conference on Speech Communication and Technology (1999)","DOI":"10.21437\/Eurospeech.1999-207"},{"issue":"2\u20133","key":"822_CR8","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1016\/S0167-6393(99)00044-8","volume":"30","author":"SM Witt","year":"2000","unstructured":"Witt, S.M., Young, S.J.: Phone-level pronunciation scoring and assessment for interactive language learning. Speech Commun. 30(2\u20133), 95\u2013108 (2000)","journal-title":"Speech Commun."},{"key":"822_CR9","unstructured":"Zhang, F., Huang, C., Soong, F.K., Chu, M., Wang, R.: Automatic mispronunciation detection for Mandarin. In: Acoustics, Speech and Signal Processing, 2008. ICASSP 2008. IEEE International Conference on, IEEE, pp. 5077\u20135080 (2008)"},{"key":"822_CR10","unstructured":"Young S., Kershaw, S., Odell, J., Ollason, D., Valtchev, V., Woodland, P.: The HTK Book (for HTK Version 3.0) (2000)"},{"key":"822_CR11","doi-asserted-by":"crossref","unstructured":"Ito, A., Lim, Y.-L., Suzuki, M., Makino, S.: Pronunciation error detection method based on error rule clustering using a decision tree. In: Ninth European Conference on Speech Communication and Technology (2005)","DOI":"10.21437\/Interspeech.2005-96"},{"issue":"4","key":"822_CR12","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1016\/j.specom.2004.12.004","volume":"45","author":"H Jiang","year":"2005","unstructured":"Jiang, H.: Confidence measures for speech recognition: A survey. Speech Commun. 45(4), 455\u2013470 (2005)","journal-title":"Speech Commun."},{"key":"822_CR13","doi-asserted-by":"crossref","unstructured":"Rose, R.C., Juang, B.-H., Lee, C.-H.: A training procedure for verifying string hypotheses in continuous speech recognition. In: International Conference on Acoustics, Speech, and Signal Processing, IEEE, pp. 281\u2013284 (1995)","DOI":"10.1109\/ICASSP.1995.479528"},{"issue":"3","key":"822_CR14","doi-asserted-by":"publisher","first-page":"288","DOI":"10.1109\/89.906002","volume":"9","author":"F Wessel","year":"2001","unstructured":"Wessel, F., Schluter, R., Macherey, K., Ney, H.: Confidence measures for large vocabulary continuous speech recognition. IEEE Trans. Speech Audio Process. 9(3), 288\u2013298 (2001)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"822_CR15","doi-asserted-by":"crossref","unstructured":"Zhang, R., Rudnicky, A.I.: Word level confidence annotation using combinations of features. In: Seventh European Conference on Speech Communication and Technology (2001)","DOI":"10.21437\/Eurospeech.2001-496"},{"issue":"4","key":"822_CR16","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1016\/S0885-2308(03)00008-1","volume":"17","author":"Y Liu","year":"2003","unstructured":"Liu, Y., Fung, P.: Modeling partial pronunciation variations for spontaneous Mandarin speech recognition. Comput. Speech Lang. 17(4), 357\u2013379 (2003)","journal-title":"Comput. Speech Lang."},{"key":"822_CR17","doi-asserted-by":"crossref","unstructured":"Riley, M., Byrne, W., Finke, M., Khudanpur, S., Ljolje, A., McDonough, J., Nock, H., Saraclar, M., Wooters, C. and Zavaliagkos, G. (1999)Stochastic pronunciation modelling from hand-labelled phonetic corpora. Speech Communication, 29(2-4), pp.209\u2013224","DOI":"10.1016\/S0167-6393(99)00037-0"},{"issue":"3","key":"822_CR18","doi-asserted-by":"publisher","first-page":"483","DOI":"10.3390\/app9030483","volume":"9","author":"RA Minhas","year":"2019","unstructured":"Minhas, R.A., Javed, A., Irtaza, A., Mahmood, M.T., Joo, Y.B.: Shot classification of field sports videos using alexnet convolutional neural network. Appl. Sci. 9(3), 483 (2019)","journal-title":"Appl. Sci."},{"issue":"10","key":"822_CR19","doi-asserted-by":"publisher","first-page":"896","DOI":"10.1016\/j.specom.2009.03.004","volume":"51","author":"S Wei","year":"2009","unstructured":"Wei, S., Hu, G., Hu, Y., Wang, R.-H.: A new method for mispronunciation detection using support vector machine based on pronunciation space models. Speech Commun. 51(10), 896\u2013905 (2009)","journal-title":"Speech Commun."},{"issue":"4","key":"822_CR20","doi-asserted-by":"publisher","first-page":"332","DOI":"10.1007\/s00530-004-0160-5","volume":"10","author":"L Lu","year":"2005","unstructured":"Lu, L., Zhang, H.-J.: Unsupervised speaker segmentation and tracking in real-time audio content analysis. Multimed. Syst. 10(4), 332\u2013343 (2005)","journal-title":"Multimed. Syst."},{"key":"822_CR21","doi-asserted-by":"crossref","unstructured":"Lu, L., Jiang, H., Zhang, H.: A robust audio classification and segmentation method. In: Proceedings of the ninth ACM international conference on Multimedia, pp. 203\u2013211 (2001)","DOI":"10.1145\/500141.500173"},{"key":"822_CR22","unstructured":"Lu, L., Li, S.Z., Zhang, H.-J.: Content-based audio segmentation using support vector machines. In: IEEE International Conference on Multimedia and Expo, 2001. ICME 2001, IEEE, pp. 749\u2013752 (2001)"},{"issue":"5","key":"822_CR23","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1016\/S0167-8655(00)00119-7","volume":"22","author":"D Li","year":"2001","unstructured":"Li, D., Sethi, I.K., Dimitrova, N., McGee, T.: Classification of general audio data for content-based retrieval. Pattern Recogn. Lett. 22(5), 533\u2013544 (2001)","journal-title":"Pattern Recogn. Lett."},{"issue":"1\u20133","key":"822_CR24","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"Reynolds, D.A., Quatieri, T.F., Dunn, R.B.: Speaker verification using adapted Gaussian mixture models. Dig. Signal Process. 10(1\u20133), 19\u201341 (2000)","journal-title":"Dig. Signal Process."},{"issue":"1","key":"822_CR25","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1007\/s00530-006-0034-0","volume":"12","author":"MKS Khan","year":"2006","unstructured":"Khan, M.K.S., Al-Khatib, W.G.: Machine-learning based classification of speech and music. Multimed. Syst. 12(1), 55\u201367 (2006)","journal-title":"Multimed. Syst."},{"key":"822_CR26","doi-asserted-by":"publisher","first-page":"52589","DOI":"10.1109\/ACCESS.2019.2912648","volume":"7","author":"F Nazir","year":"2019","unstructured":"Nazir, F., Majeed, M.N., Ghazanfar, M.A., Maqsood, M.: Mispronunciation detection using deep convolutional neural network features and transfer learning-based model for arabic phonemes. IEEE Access 7, 52589\u201352608 (2019)","journal-title":"IEEE Access"},{"key":"822_CR27","doi-asserted-by":"crossref","unstructured":"Georgoulas, G., Georgopoulos, V.C., Stylios, C.D.: Speech sound classification and detection of articulation disorders with support vector machines and wavelets. In: Engineering in Medicine and Biology Society, 2006. EMBS'06. 28th Annual International Conference of the IEEE, IEEE, pp. 2199\u20132202 (2006)","DOI":"10.1109\/IEMBS.2006.259499"},{"key":"822_CR28","doi-asserted-by":"crossref","unstructured":"Abdou, S.M., Hamid, S.E., Rashwan, M., Samir, A., Abdel-Hamid, O., Shahin, M., Nazih, W.: Computer aided pronunciation learning system using speech recognition techniques. In: Ninth International Conference on Spoken Language Processing (2006)","DOI":"10.21437\/Interspeech.2006-287"},{"key":"822_CR29","doi-asserted-by":"crossref","unstructured":"Li, K., Qian, X., Kang, S., Meng, H.: Lexical stress detection for L2 English speech using deep belief networks. In: Interspeech, pp 1811\u20131815 (2013)","DOI":"10.21437\/Interspeech.2013-447"},{"key":"822_CR30","doi-asserted-by":"crossref","unstructured":"Al Hindi, A., Alsulaiman, M., Muhammad, G., Al-Kahtani, S.: Automatic pronunciation error detection of nonnative Arabic Speech. In: Computer Systems and Applications (AICCSA), 2014 IEEE\/ACS 11th International Conference on, 2014. IEEE, pp. 190\u2013197 (2014)","DOI":"10.1109\/AICCSA.2014.7073198"},{"issue":"1","key":"822_CR31","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1109\/TASLP.2016.2621675","volume":"25","author":"K Li","year":"2017","unstructured":"Li, K., Qian, X., Meng, H.: Mispronunciation detection and diagnosis in l2 english speech using multidistribution deep neural networks. IEEE\/ACM Trans. Audio Speech Lang. Process. 25(1), 193\u2013207 (2017)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"issue":"2","key":"822_CR32","first-page":"242","volume":"16","author":"M Maqsood","year":"2019","unstructured":"Maqsood, M., Habib, H.A., Nawaz, T.: An efficientmis pronunciation detection system using discriminative acoustic phonetic features for arabic consonants. Int. Arab. J. Inf. Technol. 16(2), 242\u2013250 (2019)","journal-title":"Int. Arab. J. Inf. Technol."},{"issue":"2","key":"822_CR33","first-page":"114","volume":"54","author":"M Maqsood","year":"2017","unstructured":"Maqsood, M., Habib, H., Anwar, S., Ghazanfar, M., Nawaz, T.: A comparative study of classifier based mispronunciation detection system for confusing arabic phoneme pairs. Nucleus 54(2), 114\u2013120 (2017)","journal-title":"Nucleus"},{"issue":"3","key":"822_CR34","first-page":"30","volume":"16","author":"M Maqsood","year":"2016","unstructured":"Maqsood, M., Habib, H.A., Nawaz, T., Haider, K.Z.: A complete mispronunciation detection system for Arabic phonemes using SVM. Int. J. Comput. Sci. Netw. Sec. (IJCSNS) 16(3), 30 (2016)","journal-title":"Int. J. Comput. Sci. Netw. Sec. (IJCSNS)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-021-00822-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-021-00822-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-021-00822-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T12:41:55Z","timestamp":1725453715000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-021-00822-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,19]]},"references-count":34,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["822"],"URL":"https:\/\/doi.org\/10.1007\/s00530-021-00822-5","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,7,19]]},"assertion":[{"value":"22 June 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 June 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 July 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}