{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T06:11:10Z","timestamp":1778911870422,"version":"3.51.4"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"the Science and Technology Innovation Key R&D Program of Chongqing","award":["CSTB2024TIAD-STX0027"],"award-info":[{"award-number":["CSTB2024TIAD-STX0027"]}]},{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62472059"],"award-info":[{"award-number":["62472059"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"the Chongqing Talent Plan Project, China","award":["CSTC2024YCJH-BGZXM0022"],"award-info":[{"award-number":["CSTC2024YCJH-BGZXM0022"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Inf Syst"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s10844-025-00991-z","type":"journal-article","created":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T03:13:05Z","timestamp":1759893185000},"page":"1151-1173","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DTDiff: adaptive decoupled transformer with language-conditioned denoising learning for multimodal emotion recognition in conversation"],"prefix":"10.1007","volume":"64","author":[{"given":"Tingting","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaofei","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,8]]},"reference":[{"issue":"4","key":"991_CR1","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/S10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., Bulut, M., Lee, C., et al. (2008). Iemocap: interactive emotional dyadic motion capture database. Lang Resour Evaluation, 42(4), 335\u2013359. https:\/\/doi.org\/10.1007\/S10579-008-9076-6","journal-title":"Lang Resour Evaluation"},{"key":"991_CR2","doi-asserted-by":"publisher","unstructured":"Chen, F., Shao, J., Zhu, S., et\u00a0al. (2023). Multivariate, multi-frequency and multimodal: Rethinking graph neural networks for emotion recognition in conversation. In: IEEE\/CVF conference on computer vision and pattern recognition, pp 10761\u201310770. https:\/\/doi.org\/10.18653\/V1\/D18-1280","DOI":"10.18653\/V1\/D18-1280"},{"key":"991_CR3","doi-asserted-by":"publisher","unstructured":"Chen, B., Zhang, Z., Li ,W., et\u00a0al. (2024). Invertible diffusion models for compressed sensing. CoRR arXiv:2403.17006. https:\/\/doi.org\/10.1109\/TPAMI.2025.3538896","DOI":"10.1109\/TPAMI.2025.3538896"},{"issue":"3","key":"991_CR4","doi-asserted-by":"publisher","first-page":"989","DOI":"10.1007\/S10844-025-00925-9","volume":"63","author":"X Gan","year":"2025","unstructured":"Gan, X., Huang, X., & Zou, S. (2025). Intentional tendency-based dynamic heterogeneous graph network for emotion recognition in conversations. Journal of Intelligent Information Systems, 63(3), 989\u20131010. https:\/\/doi.org\/10.1007\/S10844-025-00925-9","journal-title":"Journal of Intelligent Information Systems"},{"key":"991_CR5","doi-asserted-by":"crossref","unstructured":"Ghosal, D., Majumder, N., Poria, S., et al. (2019). Dialoguegcn: A graph convolutional neural network for emotion recognition in conversation. In: Proceedings of the 2019 conference on empirical methods in natural language processing, pp. 154\u2013164, 10.18653\/V1\/D19-1015","DOI":"10.18653\/v1\/D19-1015"},{"key":"991_CR6","doi-asserted-by":"publisher","unstructured":"Ho, J., Jain, A., Abbeel, P. (2020). Denoising diffusion probabilistic models. In: Advances in neural information processing systems 33: annual conference on neural information processing systems 2020. https:\/\/doi.org\/10.48550\/arXiv.2006.11239","DOI":"10.48550\/arXiv.2006.11239"},{"key":"991_CR7","doi-asserted-by":"publisher","unstructured":"Hu, D., Hou, X., Wei, L., et al. (2022). MM-DFN: multimodal dynamic fusion network for emotion recognition in conversations. In: IEEE international conference on acoustics, speech and signal processing, pp. 7037\u20137041. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747397","DOI":"10.1109\/ICASSP43922.2022.9747397"},{"key":"991_CR8","doi-asserted-by":"publisher","unstructured":"Hu, J., Liu, Y., Zhao, J., et al. (2021). MMGCN: multimodal fusion via deep graph convolution network for emotion recognition in conversation. In: Proceedings of the 59th annual meeting of the association for computational linguistics, pp. 5666\u20135675. https:\/\/doi.org\/10.18653\/V1\/2021.ACL-LONG.440","DOI":"10.18653\/V1\/2021.ACL-LONG.440"},{"key":"991_CR9","doi-asserted-by":"publisher","unstructured":"Ishiwatari, T., Yasuda, Y., Miyazaki, T., et al. (2020). Relation-aware graph attention networks with relational position encodings for emotion recognition in conversations. In: Proceedings of the 2020 conference on empirical methods in natural language processing, pp. 7360\u20137370. https:\/\/doi.org\/10.18653\/V1\/2020.EMNLP-MAIN.597","DOI":"10.18653\/V1\/2020.EMNLP-MAIN.597"},{"key":"991_CR10","doi-asserted-by":"publisher","unstructured":"Jannu, C., Vanambathina, S.D. (2023a). An attention based densely connected u-net with convolutional gru for speech enhancement. In: 2023 3rd International conference on artificial intelligence and signal processing (AISP), pp. 1\u20135. https:\/\/doi.org\/10.1109\/AISP57993.2023.10134933","DOI":"10.1109\/AISP57993.2023.10134933"},{"key":"991_CR11","doi-asserted-by":"crossref","unstructured":"Jannu, C., Vanambathina, S.D. (2023b). Convolutional transformer based local and global feature learning for speech enhancement. International Journal of Advanced Computer Science and Applications 14(1). 10.14569\/IJACSA.2023.0140181","DOI":"10.14569\/IJACSA.2023.0140181"},{"issue":"1","key":"991_CR12","doi-asserted-by":"publisher","first-page":"1195","DOI":"10.3233\/JIFS-223951","volume":"45","author":"C Jannu","year":"2023","unstructured":"Jannu, C., & Vanambathina, S. D. (2023). Dct based densely connected convolutional gru for real-time speech enhancement. Journal of Intelligent & Fuzzy Systems, 45(1), 1195\u20131208. https:\/\/doi.org\/10.3233\/JIFS-223951","journal-title":"Journal of Intelligent & Fuzzy Systems"},{"issue":"01","key":"991_CR13","doi-asserted-by":"publisher","first-page":"2550001","DOI":"10.1142\/S0219467825500019","volume":"25","author":"C Jannu","year":"2025","unstructured":"Jannu, C., & Vanambathina, S. D. (2025). An overview of speech enhancement based on deep learning techniques. International Journal of Image and Graphics, 25(01), 2550001. https:\/\/doi.org\/10.1142\/S0219467825500019","journal-title":"International Journal of Image and Graphics"},{"key":"991_CR14","doi-asserted-by":"publisher","unstructured":"Jiang, Y., Xia, L., Wei, W., et al. (2024) Diffmm: Multi-modal diffusion model for recommendation. In: Proceedings of the 32nd ACM international conference on multimedia, pp. 7591\u20137599. https:\/\/doi.org\/10.1145\/3664647.3681498","DOI":"10.1145\/3664647.3681498"},{"key":"991_CR15","doi-asserted-by":"publisher","unstructured":"Jiao, W., Yang, H., King, I., et al. (2019). Higru: Hierarchical gated recurrent units for utterance-level emotion recognition. In: Proceedings of the 2019 conference of the north american chapter of the association for computational linguistics: human language technologies, pp. 397\u2013406. https:\/\/doi.org\/10.18653\/V1\/N19-1037","DOI":"10.18653\/V1\/N19-1037"},{"key":"991_CR16","doi-asserted-by":"publisher","unstructured":"Jing, Y., Zhao, X. (2024). Dq-former: Querying transformer with dynamic modality priority for cognitive-aligned multimodal emotion recognition in conversation. In: Proceedings of the 32nd ACM international conference on multimedia, pp 4795\u20134804. https:\/\/doi.org\/10.1145\/3664647.3681599","DOI":"10.1145\/3664647.3681599"},{"key":"991_CR17","doi-asserted-by":"publisher","unstructured":"Job, S., Tao, X., Cai, T., et al. (2025). Exploring causal learning through graph neural networks: an in-depth review. Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery,15. https:\/\/doi.org\/10.1002\/widm.70024","DOI":"10.1002\/widm.70024"},{"key":"991_CR18","doi-asserted-by":"publisher","unstructured":"Joshi, A., Bhat, A., Jain, A., et al. (2022). COGMEN: contextualized GNN based multimodal emotion recognition. In: Proceedings of the 2022 conference of the north american chapter of the association for computational linguistics: human language technologies, pp. 4148\u20134164. https:\/\/doi.org\/10.18653\/v1\/2022.naacl-main.306","DOI":"10.18653\/v1\/2022.naacl-main.306"},{"key":"991_CR19","doi-asserted-by":"publisher","unstructured":"Le, Y., Li, H., Ou, B., et al. (2025). Diffusion model for interest refinement in multi-interest recommendation. https:\/\/doi.org\/10.48550\/ARXIV.2502.05561","DOI":"10.48550\/ARXIV.2502.05561"},{"key":"991_CR20","doi-asserted-by":"publisher","unstructured":"Li, B., Fei, H., Liao, L., et al. (2023a) Revisiting disentanglement and fusion on modality and context in conversational multimodal emotion recognition. In: Proceedings of the 31st ACM international conference on multimedia, pp. 5923\u20135934. https:\/\/doi.org\/10.1145\/3581783.3612053","DOI":"10.1145\/3581783.3612053"},{"key":"991_CR21","doi-asserted-by":"publisher","unstructured":"Li, J., Ji, D., Li, F., et al. (2020). Hitrans: A transformer-based context- and speaker-sensitive model for emotion detection in conversations. In: Proceedings of the 28th international conference on computational linguistics, pp. 4190\u20134200. https:\/\/doi.org\/10.18653\/V1\/2020.COLING-MAIN.370","DOI":"10.18653\/V1\/2020.COLING-MAIN.370"},{"key":"991_CR22","doi-asserted-by":"publisher","unstructured":"Li, X.L., Thickstun, J., Gulrajani, I., et al. (2022). Diffusion-lm improves controllable text generation. In: Advances in neural information processing systems 35: annual conference on neural information processing systems 2022. https:\/\/doi.org\/10.48550\/arXiv.2205.14217","DOI":"10.48550\/arXiv.2205.14217"},{"key":"991_CR23","doi-asserted-by":"publisher","unstructured":"Li, Y., Wang, Y., Cui, Z. (2023c). Decoupled multimodal distilling for emotion recognition. In: Proceedings of the 2023 IEEE\/CVF conference on computer vision and pattern recognition, pp. 6631\u20136640. https:\/\/doi.org\/10.1109\/CVPR52729.2023.00641","DOI":"10.1109\/CVPR52729.2023.00641"},{"key":"991_CR24","doi-asserted-by":"publisher","unstructured":"Li, D., Wang, Y., Funakoshi, K., et al. (2023b) Joyful: Joint modality fusion and graph contrastive learning for multimodal emotion recognition. https:\/\/doi.org\/10.48550\/ARXIV.2311.11009","DOI":"10.48550\/ARXIV.2311.11009"},{"key":"991_CR25","doi-asserted-by":"publisher","unstructured":"Li, Z., Xia, L., Huang, C. (2024b) Recdiff: Diffusion model for social recommendation. In: Proceedings of the 33rd ACM international conference on information and knowledge management, pp. 1346\u20131355. https:\/\/doi.org\/10.1145\/3627673.3679630","DOI":"10.1145\/3627673.3679630"},{"key":"991_CR26","doi-asserted-by":"publisher","first-page":"985","DOI":"10.1109\/TASLP.2021.3049898","volume":"29","author":"Z Lian","year":"2021","unstructured":"Lian, Z., Liu, B., & Tao, J. (2021). Ctnet: conversational transformer network for emotion recognition. IEEE ACM Trans Audio Speech Lang Process, 29, 985\u20131000. https:\/\/doi.org\/10.1109\/TASLP.2021.3049898","journal-title":"IEEE ACM Trans Audio Speech Lang Process"},{"key":"991_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/J.NEUNET.2024.106096","volume":"172","author":"G Li","year":"2024","unstructured":"Li, G., Jin, D., Zheng, Y., et al. (2024). A generic plug & play diffusion-based denosing module for medical image segmentation. Neural Networks, 172, Article 106096. https:\/\/doi.org\/10.1016\/J.NEUNET.2024.106096","journal-title":"Neural Networks"},{"key":"991_CR28","doi-asserted-by":"publisher","unstructured":"Lin, Y., Cheng, H., Huang, C., et al. (2025). Impact of glyph information on latent space diffusion models for accurate handwritten text generation. In: 2025 IEEE international conference on acoustics, speech and signal processing, pp. 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49660.2025.10890644","DOI":"10.1109\/ICASSP49660.2025.10890644"},{"key":"991_CR29","doi-asserted-by":"publisher","unstructured":"Luo, J., Wang, J., Zhou, G. (2024). Topicdiff: A topic-enriched diffusion approach for multimodal conversational emotion detection. In: Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation, pp. 16304\u201316314. https:\/\/doi.org\/10.48550\/arXiv.2403.04789","DOI":"10.48550\/arXiv.2403.04789"},{"key":"991_CR30","doi-asserted-by":"publisher","unstructured":"Majumder, N., Poria, S., Hazarika, D., et al. (2019). Dialoguernn: An attentive RNN for emotion detection in conversations. In: Proceedings of the 33rd AAAI conference on artificial intelligence, AAAI 2019, pp. 6818\u20136825. https:\/\/doi.org\/10.1609\/AAAI.V33I01.33016818","DOI":"10.1609\/AAAI.V33I01.33016818"},{"key":"991_CR31","doi-asserted-by":"publisher","first-page":"776","DOI":"10.1109\/TMM.2023.3271019","volume":"26","author":"H Ma","year":"2024","unstructured":"Ma, H., Wang, J., Lin, H., et al. (2024). A transformer-based model with self-distillation for multimodal emotion recognition in conversations. IEEE Trans Multim, 26, 776\u2013788. https:\/\/doi.org\/10.1109\/TMM.2023.3271019","journal-title":"IEEE Trans Multim"},{"key":"991_CR32","doi-asserted-by":"publisher","unstructured":"Nguyen, C.T., Nguyen, C., Le, D., et al. (2024). Curriculum learning meets directed acyclic graph for multimodal emotion recognition. In: Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation, pp. 4259\u20134265. https:\/\/doi.org\/10.48550\/arXiv.2402.17269","DOI":"10.48550\/arXiv.2402.17269"},{"issue":"4","key":"991_CR33","doi-asserted-by":"publisher","first-page":"10907","DOI":"10.3233\/JIFS-233312","volume":"46","author":"V Parisae","year":"2024","unstructured":"Parisae, V., & Nagakishore Bhavanam, S. (2024). Multi scale encoder-decoder network with time frequency attention and s-tcn for single channel speech enhancement. Journal of Intelligent & Fuzzy Systems, 46(4), 10907\u201310907. https:\/\/doi.org\/10.3233\/JIFS-233312","journal-title":"Journal of Intelligent & Fuzzy Systems"},{"key":"991_CR34","doi-asserted-by":"publisher","unstructured":"Poria, S., Cambria, E., Hazarika, D., et al. (2017). Context-dependent sentiment analysis in user-generated videos. In: Proceedings of the 55th annual meeting of the association for computational linguistics, pp. 873\u2013883. https:\/\/doi.org\/10.18653\/V1\/P17-1081","DOI":"10.18653\/V1\/P17-1081"},{"key":"991_CR35","doi-asserted-by":"publisher","unstructured":"Poria, S., Hazarika, D., Majumder, N., et al. (2019). MELD: A multimodal multi-party dataset for emotion recognition in conversations. In: Proceedings of the 57th conference of the association for computational linguistics, pp. 527\u2013536. https:\/\/doi.org\/10.18653\/V1\/P19-1050","DOI":"10.18653\/V1\/P19-1050"},{"key":"991_CR36","doi-asserted-by":"publisher","unstructured":"Shen, W., Wu, S., Yang, Y., et al. (2021). Directed acyclic graph network for conversational emotion recognition. In: Proceedings of the 59th annual meeting of the association for computational linguistics, pp. 1551\u20131560. https:\/\/doi.org\/10.18653\/V1\/2021.ACL-LONG.123","DOI":"10.18653\/V1\/2021.ACL-LONG.123"},{"issue":"1","key":"991_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/S10844-024-00858-9","volume":"63","author":"Y Shi","year":"2025","unstructured":"Shi, Y., Cai, J., & Liao, L. (2025). Multi-task learning and mutual information maximization with crossmodal transformer for multimodal sentiment analysis. Journal of Intelligent Information Systems, 63(1), 1\u201319. https:\/\/doi.org\/10.1007\/S10844-024-00858-9","journal-title":"Journal of Intelligent Information Systems"},{"key":"991_CR38","doi-asserted-by":"publisher","unstructured":"Shou. Y., Ai, W., Du, J., et al. (2024). Efficient long-distance latent relation-aware graph neural network for multi-modal emotion recognition in conversations. https:\/\/doi.org\/10.48550\/ARXIV.2407.00119","DOI":"10.48550\/ARXIV.2407.00119"},{"key":"991_CR39","doi-asserted-by":"publisher","unstructured":"Sohl-Dickstein, J., Weiss, E.A., Maheswaranathan, N., et al. (2015). Deep unsupervised learning using nonequilibrium thermodynamics. In: Proceedings of the 32nd international conference on machine learning, pp. 2256\u20132265. https:\/\/doi.org\/10.48550\/arXiv.1503.03585","DOI":"10.48550\/arXiv.1503.03585"},{"key":"991_CR40","doi-asserted-by":"publisher","unstructured":"Sun, J., Han, S., Ruan, Y., et al. (2023). Layer-wise fusion with modality independence modeling for multi-modal emotion recognition. In: Proceedings of the 61st annual meeting of the association for computational linguistics, pp. 658\u2013670. https:\/\/doi.org\/10.18653\/V1\/2023.ACL-LONG.39","DOI":"10.18653\/V1\/2023.ACL-LONG.39"},{"issue":"4","key":"991_CR41","doi-asserted-by":"publisher","first-page":"2833","DOI":"10.1109\/TPAMI.2025.3527469","volume":"47","author":"K Sun","year":"2025","unstructured":"Sun, K., Chen, Z., Lin, X., et al. (2025). Conditional diffusion models for camouflaged and salient object detection. IEEE Trans Pattern Anal Mach Intell, 47(4), 2833\u20132848. https:\/\/doi.org\/10.1109\/TPAMI.2025.3527469","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"991_CR42","doi-asserted-by":"publisher","unstructured":"Suryanto, N., Adiputra, A.A., Kadiptya, A.Y., et al. (2025) Cityscape-adverse: Benchmarking robustness of semantic segmentation with realistic scene modifications via diffusion-based image editing. pp 69921\u201369940. https:\/\/doi.org\/10.1109\/ACCESS.2025.3537981","DOI":"10.1109\/ACCESS.2025.3537981"},{"key":"991_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2024.3453414","volume":"62","author":"D Tang","year":"2024","unstructured":"Tang, D., Cao, X., Hou, X., et al. (2024). Crs-diff: controllable remote sensing image generation with diffusion model. IEEE Trans Geosci Remote Sens, 62, 1\u201314. https:\/\/doi.org\/10.1109\/TGRS.2024.3453414","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"991_CR44","doi-asserted-by":"publisher","unstructured":"Tu, G., Xie, T., Liang, B., et al. (2024). Adaptive graph learning for multimodal conversational emotion detection. In: Proceedings of the 38th AAAI conference on artificial intelligence, pp. 19089\u201319097. https:\/\/doi.org\/10.1609\/AAAI.V38I17.29876","DOI":"10.1609\/AAAI.V38I17.29876"},{"key":"991_CR45","doi-asserted-by":"publisher","unstructured":"Wang, Q., Wu, B., Zhu, P., et al. (2020) Eca-net: Efficient channel attention for deep convolutional neural networks. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition, pp. 11531\u201311539. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01155","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"991_CR46","doi-asserted-by":"publisher","unstructured":"Wu, J., Liu, J., Zhang, T., et al. (2025). $$a^{2} h^{2}$$ for multimodal emotional data analysis. Journal of Intelligent Information Systems. https:\/\/doi.org\/10.1007\/s10844-025-00974-0","DOI":"10.1007\/s10844-025-00974-0"},{"key":"991_CR47","doi-asserted-by":"publisher","unstructured":"Wu, Z., Zhang, Q., Miao, D., et al. (2024). Hydiscgan: A hybrid distributed cgan for audio-visual privacy preservation in multimodal sentiment analysis. In: Proceedings of the 33rd international joint conference on artificial intelligence, pp. 6550\u20136558. https:\/\/doi.org\/10.24963\/ijcai.2024\/724","DOI":"10.24963\/ijcai.2024\/724"},{"issue":"2","key":"991_CR48","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1007\/S10844-024-00879-4","volume":"63","author":"J Wu","year":"2025","unstructured":"Wu, J., Wu, J., Zheng, Y., et al. (2025). Mlgat: multi-layer graph attention networks for multimodal emotion recognition in conversations. Journal of Intelligent Information Systems, 63(2), 375\u2013394. https:\/\/doi.org\/10.1007\/S10844-024-00879-4","journal-title":"Journal of Intelligent Information Systems"},{"key":"991_CR49","doi-asserted-by":"publisher","unstructured":"Xie, Y., Zhou, P., Kim, S. (2022). Decoupled side information fusion for sequential recommendation. In: Proceedings of the 45th international ACM SIGIR conference on research and development in information retrieval, pp. 1611\u20131621. https:\/\/doi.org\/10.1145\/3477495.3531963","DOI":"10.1145\/3477495.3531963"},{"key":"991_CR50","doi-asserted-by":"publisher","unstructured":"Yang, H., Gao, X., Wu, J., et al. (2023). Self-adaptive context and modal-interaction modeling for multimodal emotion recognition. In: Findings of the association for computational linguistics, pp. 6267\u20136281. https:\/\/doi.org\/10.18653\/V1\/2023.FINDINGS-ACL.390","DOI":"10.18653\/V1\/2023.FINDINGS-ACL.390"},{"key":"991_CR51","doi-asserted-by":"publisher","unstructured":"Yi Z, Zhao Z, Shen Z, et\u00a0al (2024) Multimodal fusion via hypergraph autoencoder and contrastive learning for emotion recognition in conversation. In: Proceedings of the 32nd ACM international conference on multimedia4, pp. 4341\u20134348. https:\/\/doi.org\/10.1145\/3664647.3681633","DOI":"10.1145\/3664647.3681633"},{"key":"991_CR52","doi-asserted-by":"publisher","unstructured":"Yue, Y., Yu, M., Yang, L., et al. (2025). Joint conditional diffusion model for image restoration with mixed degradations. Neurocomputing,626, Article 129512. https:\/\/doi.org\/10.1016\/J.NEUCOM.2025.129512","DOI":"10.1016\/J.NEUCOM.2025.129512"},{"key":"991_CR53","doi-asserted-by":"publisher","unstructured":"Yun, T., Lim, H., Lee, J., et al. (2024). Telme: Teacher-leading multimodal fusion network for emotion recognition in conversation. In: Proceedings of the 2024 conference of the north american chapter of the association for computational linguistics: human language technologies, pp. 82\u201395. https:\/\/doi.org\/10.18653\/V1\/2024.NAACL-LONG.5","DOI":"10.18653\/V1\/2024.NAACL-LONG.5"},{"key":"991_CR54","doi-asserted-by":"publisher","unstructured":"Zhang, X., Li, Y. (2023). A cross-modality context fusion and semantic refinement network for emotion recognition in conversation. In: Proceedings of the 61st annual meeting of the association for computational linguistics, pp. 13099\u201313110. https:\/\/doi.org\/10.18653\/V1\/2023.ACL-LONG.732","DOI":"10.18653\/V1\/2023.ACL-LONG.732"},{"key":"991_CR55","doi-asserted-by":"publisher","unstructured":"Zhang, Y., Long, J., & Li, C. (2025). Knowledge distillation for object detection with diffusion model. Neurocomputing,636, Article 130019. https:\/\/doi.org\/10.1016\/J.NEUCOM.2025.130019","DOI":"10.1016\/J.NEUCOM.2025.130019"},{"key":"991_CR56","doi-asserted-by":"publisher","unstructured":"Zhang, H., Xu, H., Long, F., et al. (2024a) Unsupervised multimodal clustering for semantics discovery in multimodal utterances. In: Proceedings of the 62nd annual meeting of the association for computational linguistic, pp. 18\u201335. https:\/\/doi.org\/10.18653\/V1\/2024.ACL-LONG.2","DOI":"10.18653\/V1\/2024.ACL-LONG.2"},{"issue":"6","key":"991_CR57","doi-asserted-by":"publisher","first-page":"4115","DOI":"10.1109\/TPAMI.2024.3355414","volume":"46","author":"M Zhang","year":"2024","unstructured":"Zhang, M., Cai, Z., Pan, L., et al. (2024). Motiondiffuse: text-driven human motion generation with diffusion model. IEEE Trans Pattern Anal Mach Intell, 46(6), 4115\u20134128. https:\/\/doi.org\/10.1109\/TPAMI.2024.3355414","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"991_CR58","doi-asserted-by":"publisher","unstructured":"Zheng, X., Zhao, G., Zhu, L., et al. (2022). PERD: personalized emoji recommendation with dynamic user preference. In: Proceedings of the 45th international ACM SIGIR conference on research and development in information retrieval, pp. 1922\u20131926. https:\/\/doi.org\/10.1145\/3477495.3531779","DOI":"10.1145\/3477495.3531779"},{"key":"991_CR59","doi-asserted-by":"publisher","unstructured":"Zong, D., Ding, C., Li, B., et al. (2023). Acformer: An aligned and compact transformer for multimodal sentiment analysis. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 833\u2013842. https:\/\/doi.org\/10.1145\/3581783.3611974","DOI":"10.1145\/3581783.3611974"},{"key":"991_CR60","doi-asserted-by":"publisher","unstructured":"Zou, S., Huang, X., Shen, X. (2023). Multimodal prompt transformer with hybrid contrastive learning for emotion recognition in conversation. In: Proceedings of the 31st ACM international conference on multimedia, pp. 5994\u20136003. https:\/\/doi.org\/10.1145\/3581783.3611805","DOI":"10.1145\/3581783.3611805"}],"container-title":["Journal of Intelligent Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10844-025-00991-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10844-025-00991-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10844-025-00991-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T05:38:30Z","timestamp":1778909910000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10844-025-00991-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,8]]},"references-count":60,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["991"],"URL":"https:\/\/doi.org\/10.1007\/s10844-025-00991-z","relation":{},"ISSN":["0925-9902","1573-7675"],"issn-type":[{"value":"0925-9902","type":"print"},{"value":"1573-7675","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,8]]},"assertion":[{"value":"5 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 September 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 September 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 October 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Not Applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}