{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T20:12:06Z","timestamp":1778011926930,"version":"3.51.4"},"reference-count":43,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100012542","name":"Sichuan Province Science and Technology Support Program","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100012542","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.eswa.2026.132635","type":"journal-article","created":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T23:44:40Z","timestamp":1777333480000},"page":"132635","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["MoMKE-DIR: A multimodal sentiment analysis model based on dynamic feature integration and iterative refinement"],"prefix":"10.1016","volume":"325","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-0039-7036","authenticated-orcid":false,"given":"Quanyi","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinglin","family":"Lyu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xijie","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2171-1357","authenticated-orcid":false,"given":"Chunzhi","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jia","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0470-8861","authenticated-orcid":false,"given":"Zhisheng","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2026.132635_sbref0001","series-title":"Proceedings of the 57th annual meeting of the association for computational linguistics","first-page":"991","article-title":"Multimodal and multi-view models for emotion recognition","author":"Aguilar","year":"2019"},{"key":"10.1016\/j.eswa.2026.132635_sbref0002","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.124852","article-title":"A systematic review of trimodal affective computing approaches: Text, audio, and visual integration in emotion recognition and sentiment analysis","volume":"255","author":"Al-Saadawi","year":"2024","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.132635_sbref0003","series-title":"Proceedings of the 56th annual meeting of the association for computational linguistics (volume 1: Long papers)","first-page":"2236","article-title":"Multimodal language analysis in the wild: CMU-MOSEI dataset and interpretable dynamic fusion graph","author":"Bagher Zadeh","year":"2018"},{"issue":"2","key":"10.1016\/j.eswa.2026.132635_bib0004","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","article-title":"Multimodal machine learning: A survey and taxonomy","volume":"41","author":"Baltru\u0161aitis","year":"2019","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"4","key":"10.1016\/j.eswa.2026.132635_sbref0005","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","article-title":"Iemocap: Interactive emotional dyadic motion capture database","volume":"42","author":"Busso","year":"2008","journal-title":"Language Resources and Evaluation"},{"key":"10.1016\/j.eswa.2026.132635_sbref0006","series-title":"Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining","first-page":"1158","article-title":"Deep adversarial learning for multi-modality missing data completion","author":"Cai","year":"2018"},{"key":"10.1016\/j.eswa.2026.132635_bib0007","unstructured":"Chung, J., Gulcehre, C., Cho, K., & Bengio, Y. (2014). Empirical evaluation of gated recurrent neural networks on sequence modeling. https:\/\/arxiv.org\/abs\/1412.3555."},{"issue":"1","key":"10.1016\/j.eswa.2026.132635_bib0008","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1109\/79.911197","article-title":"Emotion recognition in human-computer interaction","volume":"18","author":"Cowie","year":"2001","journal-title":"IEEE Signal Processing Magazine"},{"key":"10.1016\/j.eswa.2026.132635_sbref0009","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1016\/j.aiopen.2022.02.001","article-title":"Learning towards conversational AI: A survey","volume":"3","author":"Fu","year":"2022","journal-title":"AI Open"},{"key":"10.1016\/j.eswa.2026.132635_bib0010","unstructured":"Guo, Z., Jin, T., & Zhao, Z. (2024). Multimodal prompt learning with missing modalities for sentiment analysis and emotion recognition. arXiv preprint arXiv: 2407.05374."},{"key":"10.1016\/j.eswa.2026.132635_bib0011","series-title":"Proceedings of the 28th ACM international conference on multimedia","first-page":"1122","article-title":"Misa: Modality-invariant and-specific representations for multimodal sentiment analysis","author":"Hazarika","year":"2020"},{"key":"10.1016\/j.eswa.2026.132635_bib0012","unstructured":"He, P., Liu, X., Gao, J., & Chen, W. (2021). DeBERTa: Decoding-enhanced BERT with disentangled attention. https:\/\/arxiv.org\/abs\/2006.03654."},{"key":"10.1016\/j.eswa.2026.132635_bib0013","series-title":"2018 IEEE\/CVF conference on computer vision and pattern recognition","first-page":"7132","article-title":"Squeeze-and-excitation networks","author":"Hu","year":"2018"},{"key":"10.1016\/j.eswa.2026.132635_bib0014","series-title":"Proceedings of the computer vision and pattern recognition conference","first-page":"25864","article-title":"Knowledge bridger: Towards training-free missing modality completion","author":"Ke","year":"2025"},{"key":"10.1016\/j.eswa.2026.132635_sbref0015","series-title":"Embedded multimodal interfaces in robotics: Applications, future trends, and societal implications","first-page":"523","author":"Kirchner","year":"2019"},{"key":"10.1016\/j.eswa.2026.132635_bib0016","doi-asserted-by":"crossref","unstructured":"Li, Y., Wang, Y., & Cui, Z. (2023). Decoupled multimodal distilling for emotion recognition. https:\/\/arxiv.org\/abs\/2303.13802.","DOI":"10.1109\/CVPR52729.2023.00641"},{"key":"10.1016\/j.eswa.2026.132635_bib0017","doi-asserted-by":"crossref","unstructured":"Lian, Z., Chen, L., Sun, L., Liu, B., Tao, J. (2023). GCNet: Graph completion network for incomplete multimodal learning in conversation. https:\/\/arxiv.org\/abs\/2203.02177.","DOI":"10.1109\/TPAMI.2023.3234553"},{"key":"10.1016\/j.eswa.2026.132635_sbref0018","doi-asserted-by":"crossref","DOI":"10.1016\/j.artint.2022.103714","article-title":"Emotional conversation generation with heterogeneous graph neural network","volume":"308","author":"Liang","year":"2022","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.eswa.2026.132635_sbref0019","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.102216","article-title":"Adapt and explore: Multimodal mixup for representation learning","volume":"105","author":"Lin","year":"2024","journal-title":"Information Fusion"},{"key":"10.1016\/j.eswa.2026.132635_bib0020","series-title":"Multimedia modeling","first-page":"411","article-title":"Multimodal reconstruct and align net for missing modality problem in sentiment analysis","author":"Luo","year":"2023"},{"key":"10.1016\/j.eswa.2026.132635_bib0021","unstructured":"Ma, F., Xu, X., Huang, S.-L., Zhang, L. (2021). Maximum likelihood estimation for multimodal learning with missing modality. https:\/\/arxiv.org\/abs\/2108.10513."},{"key":"10.1016\/j.eswa.2026.132635_bib0022","doi-asserted-by":"crossref","unstructured":"Parthasarathy, S., & Sundaram, S. (2020). Training strategies to handle missing modalities for audio-visual expression recognition. https:\/\/arxiv.org\/abs\/2010.00734.","DOI":"10.1145\/3395035.3425202"},{"key":"10.1016\/j.eswa.2026.132635_sbref0023","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.119721","article-title":"A fine-grained modal label-based multi-stage network for multimodal sentiment analysis","volume":"221","author":"Peng","year":"2023","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.132635_bib0024","unstructured":"Pham, H., Liang, P. P., Manzini, T., Morency, L.-P., & Poczos, B. (2020). Found in translation: Learning robust joint representations by cyclic translations between modalities. https:\/\/arxiv.org\/abs\/1812.07809."},{"key":"10.1016\/j.eswa.2026.132635_bib0025","doi-asserted-by":"crossref","unstructured":"Schneider, S., Baevski, A., Collobert, R., & Auli, M. (2019). wav2vec: Unsupervised pre-training for speech recognition. https:\/\/arxiv.org\/abs\/1904.05862.","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"10.1016\/j.eswa.2026.132635_bib0026","unstructured":"Shi, Y., Siddharth, N., Paige, B., & Torr, P. H. S. (2019). Variational mixture-of-experts autoencoders for multi-modal deep generative models. https:\/\/arxiv.org\/abs\/1911.03393."},{"issue":"5","key":"10.1016\/j.eswa.2026.132635_bib0027","doi-asserted-by":"crossref","first-page":"891","DOI":"10.1109\/JPROC.2020.3047978","article-title":"Computational media intelligence: Human-centered machine analysis of media","volume":"109","author":"Somandepalli","year":"2021","journal-title":"Proceedings of the IEEE"},{"key":"10.1016\/j.eswa.2026.132635_bib0028","series-title":"2017 IEEE conference on computer vision and pattern recognition (CVPR)","first-page":"4971","article-title":"Missing modalities imputation via cascaded residual autoencoder","author":"Tran","year":"2017"},{"key":"10.1016\/j.eswa.2026.132635_bib0029","doi-asserted-by":"crossref","unstructured":"Tsai, Y.-H. H., Bai, S., Liang, P. P., Kolter, J. Z., Morency, L.-P., & Salakhutdinov, R. (2019). Multimodal transformer for unaligned multimodal language sequences. https:\/\/arxiv.org\/abs\/1906.00295.","DOI":"10.18653\/v1\/P19-1656"},{"key":"10.1016\/j.eswa.2026.132635_bib0030","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"22025","article-title":"Distribution-consistent modal recovering for incomplete multimodal learning","author":"Wang","year":"2023"},{"key":"10.1016\/j.eswa.2026.132635_sbref0031","series-title":"Advances in neural information processing systems","first-page":"17117","article-title":"Incomplete multimodality-diffused emotion recognition","volume":"vol. 36","author":"Wang","year":"2023"},{"key":"10.1016\/j.eswa.2026.132635_sbref0032","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.121930","article-title":"BayessentiRS: Bayesian sentiment analysis for addressing cold start and sparsity in ranking-based recommender systems","volume":"238","author":"Wu","year":"2024","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.132635_bib0033","unstructured":"Wu, M., & Goodman, N. (2018). Multimodal generative models for scalable weakly-supervised learning. https:\/\/arxiv.org\/abs\/1802.05335."},{"key":"10.1016\/j.eswa.2026.132635_bib0034","unstructured":"Xiao, D., Meng, Q., Li, S., & Yuan, X. (2025). MuddFormer: Breaking residual bottlenecks in transformers via multiway dynamic dense connections. https:\/\/arxiv.org\/abs\/2502.12170."},{"issue":"10","key":"10.1016\/j.eswa.2026.132635_bib0035","doi-asserted-by":"crossref","first-page":"12113","DOI":"10.1109\/TPAMI.2023.3275156","article-title":"Multimodal learning with transformers: A survey","volume":"45","author":"Xu","year":"2023","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.eswa.2026.132635_sbref0036","series-title":"Proceedings of the 32nd ACM international conferenceon multimedia","first-page":"438","article-title":"Leveraging knowledge of modality experts for incomplete multimodal learning","author":"Xu","year":"2024"},{"key":"10.1016\/j.eswa.2026.132635_bib0037","unstructured":"Zadeh, A., Zellers, R., Pincus, E., & Morency, L.-P. (2016). Mosi: Multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. https:\/\/arxiv.org\/abs\/1606.06259."},{"key":"10.1016\/j.eswa.2026.132635_bib0038","doi-asserted-by":"crossref","unstructured":"Zhang, C., Cui, Y., Han, Z., Zhou, J. T., Fu, H., & Hu, Q. (2020). Deep partial multi-view learning. https:\/\/arxiv.org\/abs\/2011.06170.","DOI":"10.1109\/TPAMI.2020.3037734"},{"issue":"10","key":"10.1016\/j.eswa.2026.132635_sbref0039","doi-asserted-by":"crossref","first-page":"1499","DOI":"10.1109\/LSP.2016.2603342","article-title":"Joint face detection and alignment using multitask cascaded convolutional networks","volume":"23","author":"Zhang","year":"2016","journal-title":"IEEE Signal Processing Letters"},{"key":"10.1016\/j.eswa.2026.132635_sbref0040","series-title":"Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing (volume 1: Long papers)","first-page":"2608","article-title":"Missing modality imagination network for emotion recognition with uncertain missing modalities","author":"Zhao","year":"2021"},{"issue":"6","key":"10.1016\/j.eswa.2026.132635_bib0041","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1109\/MSP.2021.3106895","article-title":"Emotion recognition from multiple modalities: Fundamentals and methodologies","volume":"38","author":"Zhao","year":"2021","journal-title":"IEEE Signal Processing Magazine"},{"key":"10.1016\/j.eswa.2026.132635_bib0042","doi-asserted-by":"crossref","first-page":"6544","DOI":"10.1109\/TIP.2021.3093397","article-title":"Learning deep global multi-scale and local attention features for facial expression recognition in the wild","volume":"30","author":"Zhao","year":"2021","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.eswa.2026.132635_bib0043","unstructured":"Zuo, H., Liu, R., Zhao, J., Gao, G., & Li, H. (2022). Exploiting modality-invariant feature for robust multimodal emotion recognition with missing modalities. https:\/\/arxiv.org\/abs\/2210.15359."}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426015484?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426015484?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T19:40:00Z","timestamp":1778010000000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417426015484"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":43,"alternative-id":["S0957417426015484"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.132635","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"MoMKE-DIR: A multimodal sentiment analysis model based on dynamic feature integration and iterative refinement","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.132635","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"132635"}}