{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,11]],"date-time":"2026-01-11T09:08:40Z","timestamp":1768122520372,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":31,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819557608","type":"print"},{"value":"9789819557615","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5761-5_39","type":"book-chapter","created":{"date-parts":[[2026,1,11]],"date-time":"2026-01-11T05:52:30Z","timestamp":1768110750000},"page":"561-575","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["EmoDial-Reason: Unveiling Affective Reasoning in\u00a0Speech-Emotion Dialogue"],"prefix":"10.1007","author":[{"given":"Shubei","family":"Tang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lu","family":"Xiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yaping","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chengqing","family":"Zong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,12]]},"reference":[{"key":"39_CR1","unstructured":"Achiam, J., et\u00a0al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"39_CR2","unstructured":"Bai, S., et\u00a0al.: Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)"},{"key":"39_CR3","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., et al.: IEMOCAP: interactive emotional dyadic motion capture database. Lang. Resour. Eval. 42, 335\u2013359 (2008)","journal-title":"Lang. Resour. Eval."},{"key":"39_CR4","doi-asserted-by":"crossref","unstructured":"Chae, H., et al.: Dialogue chain-of-thought distillation for commonsense-aware conversational agents. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 5606\u20135632 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.342"},{"key":"39_CR5","unstructured":"Chu, Y., et\u00a0al.: Qwen2-audio technical report. arXiv preprint arXiv:2407.10759 (2024)"},{"key":"39_CR6","doi-asserted-by":"crossref","unstructured":"Chu, Z., et al.: Navigate through enigmatic labyrinth a survey of chain of thought reasoning: advances, frontiers and future. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1173\u20131203 (2024)","DOI":"10.18653\/v1\/2024.acl-long.65"},{"key":"39_CR7","unstructured":"D\u2019efossez, A., et al.: Moshi: a speech-text foundation model for real-time dialogue. arXiv preprint arXiv:2410.00037 (2024)"},{"key":"39_CR8","doi-asserted-by":"publisher","unstructured":"Demszky, D., Movshovitz-Attias, D., Ko, J., Cowen, A., Nemade, G., Ravi, S.: GoEmotions: a dataset of fine-grained emotions. In: Jurafsky, D., Chai, J., Schluter, N., Tetreault, J. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 4040\u20134054. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.372, https:\/\/aclanthology.org\/2020.acl-main.372\/","DOI":"10.18653\/v1\/2020.acl-main.372"},{"key":"39_CR9","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis, Minnesota (2019).https:\/\/doi.org\/10.18653\/v1\/N19-1423, https:\/\/aclanthology.org\/N19-1423\/","DOI":"10.18653\/v1\/N19-1423"},{"key":"39_CR10","unstructured":"Fang, Q., Guo, S., Zhou, Y., Ma, Z., Zhang, S., Feng, Y.: Llama-omni: seamless speech interaction with large language models. arXiv preprint arXiv:2409.06666 (2024)"},{"key":"39_CR11","unstructured":"Guo, D., et\u00a0al.: Deepseek-r1: Incentivizing reasoning capability in LLMs via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)"},{"key":"39_CR12","doi-asserted-by":"crossref","unstructured":"Lee, K., Park, K., Kim, D.: Dailytalk: spoken dialogue dataset for conversational text-to-speech. ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.\u00a01\u20135 (2022). https:\/\/api.semanticscholar.org\/CorpusID:250265010","DOI":"10.1109\/ICASSP49357.2023.10095751"},{"key":"39_CR13","unstructured":"Li, G., Liu, J., Dinkel, H., Niu, Y., Zhang, J., Luan, J.: Reinforcement learning outperforms supervised fine-tuning: a case study on audio question answering. ArXiv abs\/2503.11197 https:\/\/api.semanticscholar.org\/CorpusID:277043724 (2025)"},{"key":"39_CR14","doi-asserted-by":"crossref","unstructured":"Li, J., Galley, M., Brockett, C., Gao, J., Dolan, W.B.: A diversity-promoting objective function for neural conversation models. In: Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 110\u2013119 (2016)","DOI":"10.18653\/v1\/N16-1014"},{"key":"39_CR15","unstructured":"Li, Z.Z., et\u00a0al.: From system 1 to system 2: a survey of reasoning large language models. arXiv preprint arXiv:2502.17419 (2025)"},{"key":"39_CR16","doi-asserted-by":"crossref","unstructured":"Liang, Z., et al.: A survey of multimodel large language models. In: Proceedings of the 3rd International Conference on Computer, Artificial Intelligence and Control Engineering, pp. 405\u2013409 (2024)","DOI":"10.1145\/3672758.3672824"},{"key":"39_CR17","doi-asserted-by":"crossref","unstructured":"Lin, G.T., Chiang, C.H., Lee, H.Y.: Advancing large language models to capture varied speaking styles and respond properly in spoken conversations. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 6626\u20136642 (2024)","DOI":"10.18653\/v1\/2024.acl-long.358"},{"key":"39_CR18","doi-asserted-by":"crossref","unstructured":"Lin, G.T., et al.: Paralinguistics-enhanced large language modeling of spoken dialogue. In: ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 10316\u201310320. IEEE (2024)","DOI":"10.1109\/ICASSP48485.2024.10446933"},{"key":"39_CR19","unstructured":"Minaee, S., et al.: Large language models: a survey. arXiv preprint arXiv:2402.06196 (2024)"},{"key":"39_CR20","unstructured":"Rashkin, H., Smith, E.M., Li, M., Boureau, Y.L.: I know the feeling: learning to converse with empathy. arXiv preprint arXiv:1811.00207 (2018)"},{"key":"39_CR21","unstructured":"Tang, C., et al.: Salmonn: towards generic hearing abilities for large language models. arXiv preprint arXiv:abs\/2310.13289 (2023)"},{"key":"39_CR22","unstructured":"Team, G., et\u00a0al.: Gemini 1.5: unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)"},{"key":"39_CR23","unstructured":"Wang, C., et al.: BLSP: Bootstrapping language-speech pre-training via behavior alignment of continuation writing. arXiv preprint arXiv:2309.00916 (2023)"},{"key":"39_CR24","doi-asserted-by":"crossref","unstructured":"Wang, C., Liao, M., Huang, Z., Wu, J., Zong, C., Zhang, J.: BLSP-emo: towards empathetic large speech-language models. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 19186\u201319199 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.1070"},{"key":"39_CR25","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"39_CR26","doi-asserted-by":"crossref","unstructured":"Xie, J., et al.: Leveraging chain of thought towards empathetic spoken dialogue without corresponding question-answering data. arXiv preprint arXiv:2501.10937 (2025)","DOI":"10.1109\/ICASSP49660.2025.10889870"},{"key":"39_CR27","unstructured":"Xu, J., et\u00a0al.: Qwen2. 5-omni technical report. arXiv preprint arXiv:2503.20215 (2025)"},{"key":"39_CR28","doi-asserted-by":"publisher","unstructured":"Xue, H., et al.: E-chat: emotion-sensitive spoken dialogue system with large language models. In: 2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP), pp. 586\u2013590 (2024https:\/\/doi.org\/10.1109\/ISCSLP63861.2024.10800447","DOI":"10.1109\/ISCSLP63861.2024.10800447"},{"key":"39_CR29","unstructured":"Ye, J., Xiang, L., Zhang, Y., Zong, C.: Sweetiechat: a strategy-enhanced role-playing framework for diverse scenarios handling emotional support agent. In: Proceedings of the 31st International Conference on Computational Linguistics, pp. 4646\u20134669 (2025)"},{"key":"39_CR30","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: Bertscore: evaluating text generation with bert. In: International Conference on Learning Representations (2020)"},{"key":"39_CR31","unstructured":"Zhao, J., Wei, X., Bo, L.: R1-omni: explainable omni-multimodal emotion recognition with reinforcement learning. arXiv preprint arXiv:2503.05379 (2025)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5761-5_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,11]],"date-time":"2026-01-11T05:52:33Z","timestamp":1768110753000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5761-5_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819557608","9789819557615"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5761-5_39","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"12 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}