{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:32:17Z","timestamp":1757619137477,"version":"3.44.0"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031984648"},{"type":"electronic","value":"9783031984655"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-98465-5_34","type":"book-chapter","created":{"date-parts":[[2025,7,19]],"date-time":"2025-07-19T01:46:40Z","timestamp":1752889600000},"page":"266-274","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Semi-supervised Speech Confidence Detection Using Pseudo-labelling and\u00a0Whisper Embeddings"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1631-2151","authenticated-orcid":false,"given":"Adam","family":"Wynn","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9325-1789","authenticated-orcid":false,"given":"Jingyun","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9171-1788","authenticated-orcid":false,"given":"Xiangyu","family":"Tan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"34_CR1","unstructured":"Ameer, H., Latif, S., Latif, R., Mukhtar, S.: Whisper in focus: enhancing stuttered speech classification with encoder layer optimization (2023)"},{"issue":"4","key":"34_CR2","first-page":"4453","volume":"19","author":"N Astuti","year":"2024","unstructured":"Astuti, N., Padmadewi, N.N., Putra, I.: Speech disfluency and gestures production in undergraduate students\u2019 confidence level of speaking. Media Bina Ilmiah 19(4), 4453\u20134462 (2024)","journal-title":"Media Bina Ilmiah"},{"key":"34_CR3","doi-asserted-by":"publisher","unstructured":"Boughariou, E., Bahou, Y., Belguith, L.H.: Detecting speech disorders using a machine-learning guided method in spontaneous Tunisian dialect speech. SN Comput. Sci. 5(5) (2024). https:\/\/doi.org\/10.1007\/s42979-024-02775-8","DOI":"10.1007\/s42979-024-02775-8"},{"key":"34_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.caeai.2021.100027","volume":"2","author":"AP Cavalcanti","year":"2021","unstructured":"Cavalcanti, A.P., et al.: Automatic feedback in online learning environments: a systematic literature review. Comput. Educ. Artif. Intell. 2, 100027 (2021). https:\/\/doi.org\/10.1016\/j.caeai.2021.100027","journal-title":"Comput. Educ. Artif. Intell."},{"key":"34_CR5","doi-asserted-by":"publisher","unstructured":"Gfeller, B., Frank, C., Roblek, D., Sharifi, M., Tagliasacchi, M., Velimirovi\u0107, M.: Spice: self-supervised pitch estimation. IEEE\/ACM Trans. Audio, Speech and Lang. Proc. 28, 1118\u20131128 (2020). https:\/\/doi.org\/10.1109\/TASLP.2020.2982285","DOI":"10.1109\/TASLP.2020.2982285"},{"key":"34_CR6","doi-asserted-by":"crossref","unstructured":"Goel, A., Hira, M., Gupta, A.: Exploring multilingual unseen speaker emotion recognition: leveraging co-attention cues in multitask learning (2024). arxiv:2406.08931","DOI":"10.21437\/Interspeech.2024-1820"},{"issue":"3","key":"34_CR7","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1177\/0146167218787805","volume":"45","author":"JJ Guyer","year":"2019","unstructured":"Guyer, J.J., Fabrigar, L.R., Vaughan-Johnston, T.I.: Speech rate, intonation, and pitch: investigating the bias and cue effects of vocal confidence on persuasion. Pers. Soc. Psychol. Bull. 45(3), 389\u2013405 (2019)","journal-title":"Pers. Soc. Psychol. Bull."},{"key":"34_CR8","doi-asserted-by":"publisher","unstructured":"Hernandez, F., Nguyen, V., Ghannay, S., Tomashenko, N., Est\u00e8ve, Y.: TED-LIUM 3: Twice as Much Data and Corpus Repartition for Experiments on Speaker Adaptation. In: Karpov, A., Jokisch, O., Potapova, R. (eds.) SPECOM 2018. LNCS (LNAI), vol. 11096, pp. 198\u2013208. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-99579-3_21","DOI":"10.1007\/978-3-319-99579-3_21"},{"key":"34_CR9","unstructured":"Jackson, P., Haq, S.: Surrey Audio-Visual Expressed Emotion (SAVEE) Database \u2014 kahlan.eps.surrey.ac.uk. http:\/\/kahlan.eps.surrey.ac.uk\/savee\/Database.html. Accessed 17 Feb 2025"},{"key":"34_CR10","doi-asserted-by":"publisher","unstructured":"Jiang, X., Pell, M.: Encoding and decoding confidence information in speech. In: Proceedings of Speech Prosody 2014, pp. 573\u2013576 (2014). https:\/\/doi.org\/10.21437\/SpeechProsody.2014-103","DOI":"10.21437\/SpeechProsody.2014-103"},{"key":"34_CR11","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1016\/j.specom.2017.01.011","volume":"88","author":"X Jiang","year":"2017","unstructured":"Jiang, X., Pell, M.D.: The sound of confidence and doubt. Speech Commun. 88, 106\u2013126 (2017). https:\/\/doi.org\/10.1016\/j.specom.2017.01.011","journal-title":"Speech Commun."},{"key":"34_CR12","doi-asserted-by":"crossref","unstructured":"Kasemsap, K.: Digital storytelling and digital literacy. In: Advances in Educational Marketing, Administration, and Leadership, pp. 151\u2013171. IGI Global (2017)","DOI":"10.4018\/978-1-5225-2101-3.ch009"},{"key":"34_CR13","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization (2017). arxiv:1412.6980"},{"key":"34_CR14","doi-asserted-by":"publisher","unstructured":"Kourkounakis, T., Hajavi, A., Etemad, A.: Detecting multiple speech disfluencies using a deep residual network with bidirectional long short-term memory. In: ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6089\u20136093 (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9053893","DOI":"10.1109\/ICASSP40776.2020.9053893"},{"key":"34_CR15","doi-asserted-by":"crossref","unstructured":"Lea, C., Mitra, V., Joshi, A., Kajarekar, S., Bigham, J.: Sep-28K: a dataset for stuttering event detection from podcasts with people who stutter. In: ICASSP (2021). arxiv:2102.12394pdf","DOI":"10.1109\/ICASSP39728.2021.9413520"},{"key":"34_CR16","unstructured":"Lee, D.H.: Pseudo-Label: the simple and efficient semi-supervised learning method for deep neural networks. In: ICML 2013 Workshop : Challenges in Representation Learning (WREPL) (2013)"},{"key":"34_CR17","doi-asserted-by":"publisher","unstructured":"Liu, J., Wumaier, A., Wei, D., Guo, S.: Automatic speech disfluency detection using wav2vec2.0 for different languages with variable lengths. Appl. Sci. 13(13) (2023). https:\/\/doi.org\/10.3390\/app13137579, https:\/\/www.mdpi.com\/2076-3417\/13\/13\/7579","DOI":"10.3390\/app13137579"},{"key":"34_CR18","doi-asserted-by":"publisher","unstructured":"Livingstone, S.R., Russo, F.A.: The Ryerson audio-visual database of emotional speech and song (RAVDESS): a dynamic, multimodal set of facial and vocal expressions in North American English. PLOS ONE 13(5) (2018). https:\/\/doi.org\/10.1371\/journal.pone.0196391","DOI":"10.1371\/journal.pone.0196391"},{"key":"34_CR19","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization (2019). arxiv:1711.05101"},{"issue":"4","key":"34_CR20","doi-asserted-by":"publisher","first-page":"1333","DOI":"10.54373\/ifijeb.v4i4.1629","volume":"4","author":"M Mardiana","year":"2024","unstructured":"Mardiana, M., Laksmana, B., Sukardi, S.: Effects of self-confidence and diction on speaking skills in junior high school students. Indo-Fintech Intellectuals J. Econ. Bus. 4(4), 1333\u20131344 (2024)","journal-title":"Indo-Fintech Intellectuals J. Econ. Bus."},{"key":"34_CR21","doi-asserted-by":"publisher","unstructured":"Mohapatra, P., Pandey, A., Islam, B., Zhu, Q.: Speech disfluency detection with contextual representation and data distillation. In: Proceedings of the 1st ACM International Workshop on Intelligent Acoustic Systems and Applications, pp. 19\u201324. IASA \u201922, Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3539490.3539601","DOI":"10.1145\/3539490.3539601"},{"key":"34_CR22","doi-asserted-by":"publisher","unstructured":"Nair, S., Mohan, M., Rajesh, J., Chandran, P.: On finding the best learning model for assessing confidence in speech. In: 2020 The 3rd International Conference on Machine Learning and Machine Intelligence, pp. 58\u201364. MLMI \u201920, Association for Computing Machinery, New York, NY, USA (2020). https:\/\/doi.org\/10.1145\/3426826.3426838","DOI":"10.1145\/3426826.3426838"},{"key":"34_CR23","doi-asserted-by":"publisher","unstructured":"Ningrum, N., Listyani, L.: Academic speaking students\u2019 efforts in minimizing their lack of self- confidence. Prominent 5, 141\u2013167 (2022). https:\/\/doi.org\/10.24176\/pro.v5i2.7874","DOI":"10.24176\/pro.v5i2.7874"},{"key":"34_CR24","doi-asserted-by":"publisher","unstructured":"Pepino, L., Riera, P., Ferrer, L.: Emotion recognition from speech using wav2vec 2.0 embeddings. In: Proceedings of Interspeech 2021, pp. 3400\u20133404 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-703","DOI":"10.21437\/Interspeech.2021-703"},{"key":"34_CR25","unstructured":"Pichora-Fuller, M.K., Dupuis, K.: Toronto emotional speech set (TESS) (2020)"},{"key":"34_CR26","unstructured":"Radford, A., Kim, J.W., Xu, T., Brockman, G., McLeavey, C., Sutskever, I.: Robust speech recognition via large-scale weak supervision (2022). arxiv:2212.04356"},{"key":"34_CR27","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.jfludis.2018.03.002","volume":"56","author":"NB Ratner","year":"2018","unstructured":"Ratner, N.B., MacWhinney, B.: Fluency Bank: a new resource for fluency research and practice. J. Fluency Disord. 56, 69\u201380 (2018)","journal-title":"J. Fluency Disord."},{"key":"34_CR28","unstructured":"Shahabks: Shahabks\/myprosody: a Python library for measuring the acoustic features of speech (simultaneous speech, high entropy) compared to ones of native speech. https:\/\/github.com\/Shahabks\/myprosody (2021). Accessed 03 Jun 2024"},{"key":"34_CR29","doi-asserted-by":"crossref","unstructured":"Sta\u0161, J., Hl\u00e1dek, D., Sokolov\u00e1, Z., \u010cech, M., \u0160kotkov\u00e1, K., Poremba, P.: Analysis and detection of speech under emotional stress. In: 2023 21st International Conference on Emerging eLearning Technologies and Applications (ICETA), pp. 493\u2013498. IEEE (2023)","DOI":"10.1109\/ICETA61311.2023.10343755"},{"key":"34_CR30","doi-asserted-by":"publisher","unstructured":"Trinh, H., Asadi, R., Edge, D., Bickmore, T.: RoboCOP: a robotic coach for oral presentations. Proc. ACM Interact. Mob. Wearable Ubiquitous Technol. 1(2) (2017). https:\/\/doi.org\/10.1145\/3090092","DOI":"10.1145\/3090092"},{"key":"34_CR31","unstructured":"Williams, G., McLellan, B., Sivesind, G.: Identifying Confidence in Speech, p. 6 (2017)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-98465-5_34","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T14:40:31Z","timestamp":1757256031000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-98465-5_34"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031984648","9783031984655"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-98465-5_34","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"20 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"AIED","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence in Education","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Palermo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aied2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aied2025.itd.cnr.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}