{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T07:25:10Z","timestamp":1761895510325,"version":"build-2065373602"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/icme59968.2025.11210157","type":"proceedings-article","created":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T17:57:42Z","timestamp":1761847062000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Dual Information Speech Language Models for Emotional Conversations"],"prefix":"10.1109","author":[{"given":"Chun","family":"Wang","sequence":"first","affiliation":[{"name":"Mashang Consumer Finance Co., Ltd.,Chongqing,China"}]},{"given":"Chenyang","family":"Liu","sequence":"additional","affiliation":[{"name":"Mashang Consumer Finance Co., Ltd.,Chongqing,China"}]},{"given":"Wenze","family":"Xu","sequence":"additional","affiliation":[{"name":"Mashang Consumer Finance Co., Ltd.,Chongqing,China"}]},{"given":"Weihong","family":"Deng","sequence":"additional","affiliation":[{"name":"Mashang Consumer Finance Co., Ltd.,Chongqing,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.269"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.358"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446933"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ISCSLP63861.2024.10800447"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.1070"},{"article-title":"Wavchat: A survey of spoken dialogue models","year":"2024","author":"Ji","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10445874"},{"article-title":"An embarrassingly simple approach for llm with strong asr capacity","year":"2024","author":"Ma","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2025-166"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acllong.353"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.309"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29902"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10445977"},{"article-title":"Qwen2-audio technical report","year":"2024","author":"Chu","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"article-title":"Multitask prompt tuning enables parameter-efficient transfer learning","volume-title":"The Eleventh International Conference on Learning Representations","author":"Wang","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-633"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10889444"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10445879"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1050"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095751"},{"key":"ref23","first-page":"986","article-title":"DailyDialog: A manually labelled multi-turn dialogue dataset","volume-title":"Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)","author":"Li"},{"article-title":"Llama-omni: Seamless speech interaction with large language models","year":"2024","author":"Fang","key":"ref24"},{"article-title":"Enhancing emotional generation capability of large language models via emotional chain-of-thought","year":"2024","author":"Li","key":"ref25"},{"key":"ref26","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"Proceedings of the 40th International Conference on Machine Learning, 23\u201329 Jul 2023, vol. 202 of Proceedings of Machine Learning Research","author":"Radford"},{"year":"2024","key":"ref27","article-title":"Qwen2.5: A party of foundation models"},{"article-title":"The llama 3 herd of models","year":"2024","author":"Dubey","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.931"},{"article-title":"Qwen-audio: Advancing universal audio understanding via unified large-scale audio-language models","year":"2023","author":"Chu","key":"ref30"},{"article-title":"SALMONN: Towards generic hearing abilities for large language models","volume-title":"The Twelfth International Conference on Learning Representations","author":"Tang","key":"ref31"}],"event":{"name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2025,6,30]]},"location":"Nantes, France","end":{"date-parts":[[2025,7,4]]}},"container-title":["2025 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11208895\/11208897\/11210157.pdf?arnumber=11210157","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T05:52:00Z","timestamp":1761889920000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11210157\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/icme59968.2025.11210157","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}