{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,29]],"date-time":"2025-08-29T10:15:19Z","timestamp":1756462519743,"version":"3.40.3"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030895785"},{"type":"electronic","value":"9783030895792"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-89579-2_6","type":"book-chapter","created":{"date-parts":[[2021,10,16]],"date-time":"2021-10-16T21:08:32Z","timestamp":1634418512000},"page":"61-72","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Use of Speaker Metadata for Improving Automatic Pronunciation Assessment"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8779-5947","authenticated-orcid":false,"given":"Jose Antonio Lopez","family":"Saenz","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0939-3464","authenticated-orcid":false,"given":"Thomas","family":"Hain","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"6_CR1","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization (2016)"},{"key":"6_CR2","unstructured":"Bahdanau, D., Cho, K.H., Bengio, Y.: Neural machine translation by jointly learning to align and translate. In: Proceedings of the 3rd International Conference on Learning Representations, ICLR 2015 - Conference Track Proceedings, pp. 1\u201315 (2015)"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Chen, L., Gao, Q., Liang, Q., Yuan, J., Liu, Y., China, L.I.S.: Automatic scoring minimal-pair pronunciation drills by using recognition likelihood scores and phonological features. In: SLaTE, pp. 25\u201329 (2019)","DOI":"10.21437\/SLaTE.2019-6"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Chen, L., Tao, J., Ghaffarzadegan, S., Qian, Y.: End-to-end neural network based automated speech scoring. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6234\u20136238. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8462562"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Chen, L., et al.: End-to-end neural network based automated speech scoring Midea America Corporation, 250 W Tasman Dr, San Jose, CA 95134, USA Robert Bosch Corporation, 4005 Miranda Ave, Palo Alto, CA 94304, USA Educational Testing Service (ETS), 90 New Montgomer. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6234\u20136238 (2018)","DOI":"10.1109\/ICASSP.2018.8462562"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, S., Liu, Z., Li, L., Tang, Z., Wang, D., Zheng, T.F.: ASR-free pronunciation assessment. arXiv pp. 3047\u20133051 (2020)","DOI":"10.21437\/Interspeech.2020-2623"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Chu, W., Liu, Y., Zhou, J.: Recognize mispronunciations to improve non-native acoustic modeling through a phone decoder built from one edit distance finite state automaton. In: INTERSPEECH, pp. 3062\u20133066 (2020)","DOI":"10.21437\/Interspeech.2020-3109"},{"key":"6_CR8","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.csl.2017.12.006","volume":"50","author":"S Dudy","year":"2018","unstructured":"Dudy, S., Bedrick, S., Asgari, M., Kain, A.: Automatic analysis of pronunciations for children with speech sound disorders. Comput. Speech Lang. 50, 62\u201384 (2018)","journal-title":"Comput. Speech Lang."},{"key":"6_CR9","unstructured":"Fu, K., Lin, J., Ke, D., Xie, Y., Zhang, J., Lin, B.: A full text-dependent end to end mispronunciation detection and diagnosis with easy data augmentation techniques (2021)"},{"key":"6_CR10","unstructured":"Harding, L.: What do raters need in a pronunciation scale? The user\u2019s view. In: Isaacs, T., Trofimovich, P. (eds.) Second Language Pronunciation Assessment: Interdisciplinary Perspectives, chap. 2, pp. 12\u201334. Multilingual Matters\/Channel View Publications (2017)"},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Huang, G., Ye, J., Shen, Y., Zhou, Y.: A evaluating model of English pronunciation for Chinese students. In: 2017 IEEE 9th International Conference on Communication Software and Networks (ICCSN), pp. 1062\u20131065. IEEE (2017)","DOI":"10.1109\/ICCSN.2017.8230273"},{"key":"6_CR13","unstructured":"Lindemann, S.: Variation or \u2018error\u2019? perception of pronunciation variation and implications for assessment. Second language pronunciation assessment, p. 193 (2017)"},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Milner, R., Jalal, M.A., Ng, R.W., Hain, T.: A cross-corpus study on speech emotion recognition. In: 2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 304\u2013311. IEEE (2019)","DOI":"10.1109\/ASRU46091.2019.9003838"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Moore, R.K., Skidmore, L.: On the use\/misuse of the term\u2019phoneme\u2019. arXiv preprint arXiv:1907.11640 (2019)","DOI":"10.21437\/Interspeech.2019-2711"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Nicolao, M., Beeston, A.V., Hain, T.: Automatic assessment of English learner pronunciation using discriminative classifiers. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5351\u20135355. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178993"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Robinson, T., Fransen, J., Pye, D., Foote, J., Renals, S.: WSJCAMO: a British English speech corpus for large vocabulary continuous speech recognition. In: 1995 International Conference on Acoustics, Speech, and Signal Processing, vol. 1, pp. 81\u201384. IEEE (1995)","DOI":"10.1109\/ICASSP.1995.479278"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"Sak, H., Senior, A., Beaufays, F.: Long short-term memory based recurrent neural network architectures for large vocabulary speech recognition (2014)","DOI":"10.21437\/Interspeech.2014-80"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Sudhakara, S., Ramanathi, M.K., Yarra, C., Ghosh, P.K.: An improved goodness of pronunciation (GoP) measure for pronunciation evaluation with DNN-HMM system considering hmm transition probabilities. In: INTERSPEECH, pp. 954\u2013958 (2019)","DOI":"10.21437\/Interspeech.2019-2363"},{"key":"6_CR20","unstructured":"Trofimovich, P., Isaacs, T.: Second language pronunciation assessment: a look at the present and the future. Second Language Pronunciation Assessment, p. 259 (2017)"},{"key":"6_CR21","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"issue":"3","key":"6_CR22","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1080\/15434303.2015.1037446","volume":"12","author":"J Wei","year":"2015","unstructured":"Wei, J., Llosa, L.: Investigating differences between American and Indian raters in assessing TOEFL iBT speaking tasks. Lang. Assess. Q. 12(3), 283\u2013304 (2015)","journal-title":"Lang. Assess. Q."},{"issue":"2\u20133","key":"6_CR23","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1016\/S0167-6393(99)00044-8","volume":"30","author":"SM Witt","year":"2000","unstructured":"Witt, S.M., Young, S.J.: Phone-level pronunciation scoring and assessment for interactive language learning. Speech Commun. 30(2\u20133), 95\u2013108 (2000)","journal-title":"Speech Commun."},{"key":"6_CR24","doi-asserted-by":"publisher","unstructured":"Witteman, M.J., Weber, A., McQueen, J.M.: Tolerance for inconsistency in foreign-accented speech. Psychon. Bull. Rev. 21(2), 512\u2013519 (2014). https:\/\/doi.org\/10.3758\/s13423-013-0519-8, http:\/\/link.springer.com\/10.3758\/s13423-013-0519-8","DOI":"10.3758\/s13423-013-0519-8"},{"key":"6_CR25","doi-asserted-by":"publisher","unstructured":"Zeyer, A., Doetsch, P., Voigtlaender, P., Schluter, R., Ney, H.: A comprehensive study of deep bidirectional LSTM RNNS for acoustic modeling in speech recognition. ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings pp. 2462\u20132466 (2017). doi: https:\/\/doi.org\/10.1109\/ICASSP.2017.7952599","DOI":"10.1109\/ICASSP.2017.7952599"},{"issue":"7","key":"6_CR26","doi-asserted-by":"publisher","first-page":"1809","DOI":"10.3390\/s20071809","volume":"20","author":"L Zhang","year":"2020","unstructured":"Zhang, L., et al.: End-to-end automatic pronunciation error detection based on improved hybrid ctc\/attention architecture. Sensors 20(7), 1809 (2020)","journal-title":"Sensors"}],"container-title":["Lecture Notes in Computer Science","Statistical Language and Speech Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-89579-2_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T01:52:48Z","timestamp":1725933168000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-89579-2_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030895785","9783030895792"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-89579-2_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"17 October 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SLSP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Statistical Language and Speech Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Cardiff","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 November 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 November 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"slsp2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/irdta.eu\/slsp2020-2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"43% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}