{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T10:08:02Z","timestamp":1776679682802,"version":"3.51.2"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819570775","type":"print"},{"value":"9789819570782","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-7078-2_25","type":"book-chapter","created":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T09:26:54Z","timestamp":1776677214000},"page":"386-401","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["FluLLM: Speech Fluency Classification Based on\u00a0Multi-modal Large Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-0973-4505","authenticated-orcid":false,"given":"Mulati","family":"Kahaer","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1681-1089","authenticated-orcid":false,"given":"Aishan","family":"Wumaier","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7714-5330","authenticated-orcid":false,"given":"Zhengping","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3987-8141","authenticated-orcid":false,"given":"Licheng","family":"Ren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2131-2094","authenticated-orcid":false,"given":"Xueliang","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,21]]},"reference":[{"key":"25_CR1","unstructured":"Bhat, S., Hasegawa-Johnson, M., Sproat, R.: Automatic fluency assessment by signal-level measurement of spontaneous speech. In: Second Language Studies: Acquisition, Learning, Education and Technology (L2WS 2010), pp. paper O2\u20131 (2010)"},{"issue":"4","key":"25_CR2","doi-asserted-by":"publisher","first-page":"1015","DOI":"10.1109\/TASL.2010.2076389","volume":"19","author":"MP Black","year":"2011","unstructured":"Black, M.P., Tepperman, J., Narayanan, S.S.: Automatic prediction of children\u2019s reading ability for high-level literacy assessment. IEEE Trans. Audio Speech Lang. Process. 19(4), 1015\u20131028 (2011). https:\/\/doi.org\/10.1109\/TASL.2010.2076389","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"25_CR3","doi-asserted-by":"publisher","unstructured":"Chen, G., Parsa, V.: Bayesian model based non-intrusive speech quality evaluation. In: Proceedings. (ICASSP \u201905). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005, vol.\u00a01, pp. I\/385\u2013I\/388 (2005). https:\/\/doi.org\/10.1109\/ICASSP.2005.1415131","DOI":"10.1109\/ICASSP.2005.1415131"},{"key":"25_CR4","doi-asserted-by":"publisher","unstructured":"Chen, N.F., Li, H.: Computer-assisted pronunciation training: From pronunciation scoring towards spoken language learning. In: 2016 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA), pp.\u00a01\u20137 (2016). https:\/\/doi.org\/10.1109\/APSIPA.2016.7820782","DOI":"10.1109\/APSIPA.2016.7820782"},{"key":"25_CR5","doi-asserted-by":"publisher","unstructured":"Chi, P.H., et al.: Audio albert: a lite bert for self-supervised learning of audio representation. In: 2021 IEEE Spoken Language Technology Workshop (SLT), pp. 344\u2013350 (2021). https:\/\/doi.org\/10.1109\/SLT48900.2021.9383575","DOI":"10.1109\/SLT48900.2021.9383575"},{"key":"25_CR6","unstructured":"Chu, Y., et al.: Qwen2-audio technical report (2024). https:\/\/arxiv.org\/abs\/2407.10759"},{"key":"25_CR7","doi-asserted-by":"publisher","unstructured":"Chung, H., Lee, Y.K., Lee, S.J., Park, J.G.: Spoken English fluency scoring using convolutional neural networks. In: 2017 20th Conference of the Oriental Chapter of the International Coordinating Committee on Speech Databases and Speech I\/O Systems and Assessment (O-COCOSDA), pp.\u00a01\u20136 (2017). https:\/\/doi.org\/10.1109\/ICSDA.2017.8384444","DOI":"10.1109\/ICSDA.2017.8384444"},{"key":"25_CR8","unstructured":"Cui, W., et al.: Recent advances in speech language models: a survey (2025). https:\/\/arxiv.org\/abs\/2410.03751"},{"key":"25_CR9","doi-asserted-by":"publisher","unstructured":"Evanini, K., Wang, X.: Automated speech scoring for non-native middle school students with multiple task types. In: Interspeech 2013, pp. 2435\u20132439 (2013). https:\/\/doi.org\/10.21437\/Interspeech.2013-566","DOI":"10.21437\/Interspeech.2013-566"},{"key":"25_CR10","unstructured":"Fu, K., Peng, L., Yang, N., Zhou, S.: Pronunciation assessment with multi-modal large language models (2024). https:\/\/arxiv.org\/abs\/2407.09209"},{"key":"25_CR11","unstructured":"Huang, R., et al.: Audiogpt: understanding and generating speech, music, sound, and talking head (2023). https:\/\/arxiv.org\/abs\/2304.12995"},{"key":"25_CR12","doi-asserted-by":"publisher","unstructured":"Kim, S., Jo, M.: Is gpt-4 alone sufficient for automated essay scoring?: a comparative judgment approach based on rater cognition. In: Proceedings of the Eleventh ACM Conference on Learning @ Scale, pp. 315\u2013319. L@S \u201924, Association for Computing Machinery, New York, NY, USA (2024). https:\/\/doi.org\/10.1145\/3657604.3664703","DOI":"10.1145\/3657604.3664703"},{"key":"25_CR13","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: Blip-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: Proceedings of the 40th International Conference on Machine Learning. ICML\u201923, JMLR.org (2023)"},{"key":"25_CR14","doi-asserted-by":"publisher","unstructured":"Liu, J., Wumaier, A., Fan, C., Guo, S.: Automatic fluency assessment method for spontaneous speech without reference text. Electronics 12(8) (2023). https:\/\/doi.org\/10.3390\/electronics12081775, https:\/\/www.mdpi.com\/2079-9292\/12\/8\/1775","DOI":"10.3390\/electronics12081775"},{"key":"25_CR15","doi-asserted-by":"publisher","unstructured":"Ma, Z., et al.: An embarrassingly simple approach for LLM with strong ASR capacity. CoRR abs\/2402.08846 (2024). https:\/\/doi.org\/10.48550\/ARXIV.2402.08846","DOI":"10.48550\/ARXIV.2402.08846"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"Metallinou, A., Cheng, J.: Using deep neural networks to improve proficiency assessment for children English language learners. In: Interspeech (2014). https:\/\/api.semanticscholar.org\/CorpusID:3208894","DOI":"10.21437\/Interspeech.2014-358"},{"key":"25_CR17","doi-asserted-by":"publisher","unstructured":"Panda, A., Acharya, R., Kopparapu, S.K.: Oral fluency classification for speech assessment. In: 2023 31st European Signal Processing Conference (EUSIPCO), pp. 231\u2013235 (2023). https:\/\/doi.org\/10.23919\/EUSIPCO58844.2023.10289791","DOI":"10.23919\/EUSIPCO58844.2023.10289791"},{"key":"25_CR18","doi-asserted-by":"publisher","unstructured":"Pepino, L., Riera, P., Ferrer, L.: Emotion recognition from speech using wav2vec 2.0 embeddings. In: Interspeech 2021, pp. 3400\u20133404 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-703","DOI":"10.21437\/Interspeech.2021-703"},{"key":"25_CR19","unstructured":"Preciado-Grijalva, A., Brena, R.F.: Speaker fluency level classification using machine learning techniques (2018). https:\/\/arxiv.org\/abs\/1808.10556"},{"key":"25_CR20","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision (2021). https:\/\/arxiv.org\/abs\/2103.00020"},{"key":"25_CR21","unstructured":"Tang, C., et al.: Salmonn: towards generic hearing abilities for large language models (2024). https:\/\/arxiv.org\/abs\/2310.13289"},{"key":"25_CR22","doi-asserted-by":"publisher","unstructured":"Tejedor-Garc\u00eda, C., Carde\u00f1oso-Payo, V., Machuca, M.J., Escudero-Mancebo, D., R\u00edos, A., Kimura, T.: Improving pronunciation of Spanish as a foreign language for l1 Japanese speakers with Japa\u00f1ol capt tool. In: IberSPEECH 2018, pp. 97\u2013101 (2018). https:\/\/doi.org\/10.21437\/IberSPEECH.2018-21","DOI":"10.21437\/IberSPEECH.2018-21"},{"key":"25_CR23","unstructured":"Touvron, H., et al.: Llama 2: open foundation and fine-tuned chat models (2023). https:\/\/arxiv.org\/abs\/2307.09288"},{"key":"25_CR24","unstructured":"Wade, P.S., Andries, M., Kanellos, I., Moudenc, T.: Acoustic-based fluency classification using LSTM-Attention with computationally-cheap data augmentation for an adaptive voicebot (2023). https:\/\/imt-atlantique.hal.science\/hal-04105008, working paper or preprint"},{"key":"25_CR25","doi-asserted-by":"publisher","unstructured":"Xiao, C., et al.: Human-ai collaborative essay scoring: a dual-process framework with llms. In: Proceedings of the 15th International Learning Analytics and Knowledge Conference, pp. 293\u2013305. LAK \u201925, Association for Computing Machinery, New York, NY, USA (2025). https:\/\/doi.org\/10.1145\/3706468.3706507","DOI":"10.1145\/3706468.3706507"},{"key":"25_CR26","doi-asserted-by":"publisher","unstructured":"Yang, G., Ma, Z., Yu, F., Gao, Z., Zhang, S., Chen, X.: Mala-asr: multimedia-assisted llm-based asr. In: Interspeech 2024, pp. 2405\u20132409 (2024). https:\/\/doi.org\/10.21437\/Interspeech.2024-488","DOI":"10.21437\/Interspeech.2024-488"},{"key":"25_CR27","doi-asserted-by":"publisher","unstructured":"Yu, Z., et al.: Using bidirectional lstm recurrent neural networks to learn high-level abstractions of sequential features for automated scoring of non-native spontaneous speech. In: 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 338\u2013345 (2015). https:\/\/doi.org\/10.1109\/ASRU.2015.7404814","DOI":"10.1109\/ASRU.2015.7404814"},{"key":"25_CR28","doi-asserted-by":"publisher","unstructured":"Zhang, J., et al.: Speechocean762: an open-source non-native English speech corpus for pronunciation assessment. In: Interspeech 2021, pp. 3710\u20133714 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-1259","DOI":"10.21437\/Interspeech.2021-1259"},{"key":"25_CR29","doi-asserted-by":"publisher","unstructured":"Zhang, M., Tan, C., Lin, B.: Exploring the potential of multimodal large language models as augmentative and alternative communication systems: optimization, challenges, and ethical considerations. In: 2025 14th International Conference on Educational and Information Technology (ICEIT), pp. 27\u201331 (2025). https:\/\/doi.org\/10.1109\/ICEIT64364.2025.10976080","DOI":"10.1109\/ICEIT64364.2025.10976080"}],"container-title":["Lecture Notes in Computer Science","PRICAI 2025: Trends in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-7078-2_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T09:27:06Z","timestamp":1776677226000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-7078-2_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819570775","9789819570782"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-7078-2_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"21 April 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific Rim International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wellington","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 November 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pricai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.pricai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}