{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T14:52:37Z","timestamp":1770821557749,"version":"3.50.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T00:00:00Z","timestamp":1751414400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T00:00:00Z","timestamp":1751414400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100019042","name":"Jiangsu Ocean University","doi-asserted-by":"publisher","award":["No. KYCX2023-79"],"award-info":[{"award-number":["No. KYCX2023-79"]}],"id":[{"id":"10.13039\/501100019042","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 72174079"],"award-info":[{"award-number":["No. 72174079"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008868","name":"Jiangsu Provincial Department of Science and Technology","doi-asserted-by":"publisher","award":["No. SBK2024041254"],"award-info":[{"award-number":["No. SBK2024041254"]}],"id":[{"id":"10.13039\/501100008868","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s11760-025-04387-y","type":"journal-article","created":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T06:26:06Z","timestamp":1751437566000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Dual-stream multi-level interaction network for aspect-based multimodal sentiment analysis"],"prefix":"10.1007","volume":"19","author":[{"given":"Yuxiang","family":"Wang","sequence":"first","affiliation":[]},{"given":"Xuefeng","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Zhaoman","family":"Zhong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,2]]},"reference":[{"key":"4387_CR1","doi-asserted-by":"publisher","unstructured":"Zhou, Q., Liang, H., Lin, Z., Xu, K.: Multimodal feature fusion for video advertisements tagging via stacking ensemble. arXiv preprint arXiv:2108.00679 (2021). https:\/\/doi.org\/10.48550\/arXiv.2108.00679","DOI":"10.48550\/arXiv.2108.00679"},{"key":"4387_CR2","doi-asserted-by":"publisher","first-page":"3375","DOI":"10.1109\/TMM.2022.3160060","volume":"25","author":"T Zhu","year":"2023","unstructured":"Zhu, T., Li, L., Yang, J., Zhao, S., Liu, H., Qian, J.: Multimodal sentiment analysis with image-text interaction network. IEEE Trans. Multimed. 25, 3375\u20133385 (2023). https:\/\/doi.org\/10.1109\/TMM.2022.3160060","journal-title":"IEEE Trans. Multimed."},{"key":"4387_CR3","doi-asserted-by":"publisher","unstructured":"Chen, X., Lu, G., Yan, J.: Multimodal sentiment analysis based on multi-head attention mechanism. In: Proceedings of the 2020 International Conference on Multimedia Modeling (MMM), pp. 1\u201312 (2020). https:\/\/doi.org\/10.1145\/3380688.3380693","DOI":"10.1145\/3380688.3380693"},{"key":"4387_CR4","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1016\/j.neucom.2022.05.045","volume":"500","author":"Z Zhao","year":"2022","unstructured":"Zhao, Z., Tang, M., Tang, W., Wang, C., Chen, X.: Graph convolutional network with multiple weight mechanisms for aspect-based sentiment analysis. Neurocomputing 500, 124\u2013134 (2022). https:\/\/doi.org\/10.1016\/j.neucom.2022.05.045","journal-title":"Neurocomputing"},{"key":"4387_CR5","doi-asserted-by":"publisher","unstructured":"Jiang, T., Wang, J., Liu, Z., Ling, Y.: Fusion-extraction network for multimodal sentiment analysis. In: Advances in Knowledge Discovery and Data Mining (PAKDD 2020), pp. 785\u2013797 (2020). https:\/\/doi.org\/10.1007\/978-3-030-47436-2_59","DOI":"10.1007\/978-3-030-47436-2_59"},{"key":"4387_CR6","doi-asserted-by":"publisher","unstructured":"Yu, J., Jing, J.: Adapting BERT for target-oriented multimodal sentiment classification. In: Proceedings of International Joint Conference on Artificial Intelligence (IJCAI), pp. 5408\u20135414 (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/751","DOI":"10.24963\/ijcai.2019\/751"},{"key":"4387_CR7","doi-asserted-by":"publisher","first-page":"127222","DOI":"10.1016\/j.neucom.2023.127222","volume":"573","author":"J Yang","year":"2024","unstructured":"Yang, J., Xu, M., Xiao, Y., Du, X.: AMIFN: Aspect-guided multi-view interactions and fusion network for multimodal aspect-based sentiment analysis. Neurocomputing 573, 127222 (2024). https:\/\/doi.org\/10.1016\/j.neucom.2023.127222","journal-title":"Neurocomputing"},{"key":"4387_CR8","unstructured":"Zhao, F., Wu, Z., Long, S., Dai, X., Huang, S., Chen, J.: Learning from adjective-noun pairs: A knowledge-enhanced framework for target-oriented multimodal sentiment classification. In: Proceedings of 29th International Conference on Computer Linguist. (COLING), pp. 6784\u20136794 (2022). https:\/\/aclanthology.org\/2022.coling-1.590\/"},{"key":"4387_CR9","doi-asserted-by":"publisher","first-page":"107220","DOI":"10.1016\/j.knosys.2021.107220","volume":"227","author":"A Zhao","year":"2021","unstructured":"Zhao, A., Yu, Y.: Knowledge-enabled BERT for aspect-based sentiment analysis. Knowl.-Based Syst. 227, 107220 (2021). https:\/\/doi.org\/10.1016\/j.knosys.2021.107220","journal-title":"Knowl.-Based Syst."},{"key":"4387_CR10","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1609\/aaai.v33i01.3301371","volume":"33","author":"N Xu","year":"2019","unstructured":"Xu, N., Mao, W., Chen, G.: Multi-interactive memory network for aspect-based multimodal sentiment analysis. Proc. AAAI Conf. Artif. Intell. 33, 371\u2013378 (2019). https:\/\/doi.org\/10.1609\/aaai.v33i01.3301371","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"4387_CR11","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2016). https:\/\/doi.org\/10.48550\/arXiv.1512.03385","DOI":"10.48550\/arXiv.1512.03385"},{"key":"4387_CR12","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1706.03762","author":"A Vaswani","year":"2017","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I.: Attention is all you need. Adv. Neural. Inf. Process. Syst. (2017). https:\/\/doi.org\/10.48550\/arXiv.1706.03762","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4387_CR13","doi-asserted-by":"publisher","unstructured":"Kumar, A., Jaiswal, A.: Image sentiment analysis using convolutional neural network. In: Proceedings of the International Conference on Image Processing (ICIP), pp. 464\u2013473 (2017). https:\/\/doi.org\/10.1007\/978-3-319-76348-4_45","DOI":"10.1007\/978-3-319-76348-4_45"},{"key":"4387_CR14","doi-asserted-by":"publisher","first-page":"101003","DOI":"10.1016\/j.jocs.2019.05.009","volume":"36","author":"K Sailunaz","year":"2019","unstructured":"Sailunaz, K., Alhajj, R.: Emotion and sentiment analysis from Twitter text. J. Comput. Sci. 36, 101003 (2019). https:\/\/doi.org\/10.1016\/j.jocs.2019.05.009","journal-title":"J. Comput. Sci."},{"issue":"8","key":"4387_CR15","doi-asserted-by":"publisher","first-page":"1668","DOI":"10.1109\/TASLP.2017.2678164","volume":"25","author":"L Kaushik","year":"2017","unstructured":"Kaushik, L., Sangwan, A., Hansen, J.H.L.: Automatic sentiment detection in naturalistic audio. IEEE\/ACM Trans. Audio Speech Lang. Process. 25(8), 1668\u20131679 (2017). https:\/\/doi.org\/10.1109\/TASLP.2017.2678164","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"4387_CR16","doi-asserted-by":"publisher","unstructured":"Li, L.H., Yatskar, M., Yin, D., Hsieh, C.-J., Chang, K.-W.: What does BERT with vision look at?. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (ACL 2020), pp. 5265\u20135275 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.469","DOI":"10.18653\/v1\/2020.acl-main.469"},{"key":"4387_CR17","doi-asserted-by":"publisher","unstructured":"Kiela, D., Bhooshan, S., Firooz, H., Perez, E., Testuggine, D.: Supervised multimodal bitransformers for classifying images and text. arXiv preprint arXiv:1909.02950 (2020). https:\/\/doi.org\/10.48550\/arXiv.1909.02950","DOI":"10.48550\/arXiv.1909.02950"},{"key":"4387_CR18","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1016\/j.inffus.2019.08.009","volume":"55","author":"W Zhang","year":"2020","unstructured":"Zhang, W., Yu, J., Hu, H., Qin, Z.: Multimodal feature fusion by relational reasoning and attention for visual question answering. Inf. Fusion 55, 116\u2013126 (2020)","journal-title":"Inf. Fusion"},{"key":"4387_CR19","doi-asserted-by":"publisher","unstructured":"Dai, Y., Gieseke, F., Oehmcke, S., Wu, Y., Barnard, K.: Attentional feature fusion. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 3560\u20133569 (2021). https:\/\/doi.org\/10.1109\/WACV48630.2021.00360","DOI":"10.1109\/WACV48630.2021.00360"},{"issue":"5","key":"4387_CR20","doi-asserted-by":"publisher","first-page":"3192","DOI":"10.1109\/TCSVT.2023.3312858","volume":"34","author":"X Zhang","year":"2023","unstructured":"Zhang, X., Li, M., Lin, S., Xu, H., Xiao, G.: Transformer-based multimodal emotional perception for dynamic facial expression recognition in the wild. IEEE Trans. Circuits Syst. Video Technol. 34(5), 3192\u20133203 (2023). https:\/\/doi.org\/10.1109\/TCSVT.2023.3312858","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"3","key":"4387_CR21","doi-asserted-by":"publisher","first-page":"1966","DOI":"10.1109\/TAFFC.2022.3171091","volume":"14","author":"K Yu","year":"2023","unstructured":"Yu, K., Chen, R., Xia, R.: Hierarchical interactive multimodal transformer for aspect-based multimodal sentiment analysis. IEEE Trans. Affect. Comput. 14(3), 1966\u20131978 (2023). https:\/\/doi.org\/10.1109\/TAFFC.2022.3171091","journal-title":"IEEE Trans. Affect. Comput."},{"key":"4387_CR22","doi-asserted-by":"publisher","first-page":"2015","DOI":"10.1109\/TASLP.2022.3178204","volume":"30","author":"B Yang","year":"2022","unstructured":"Yang, B., Wu, L., Zhu, J., Shao, B., Lin, X., Liu, T.-Y.: Multimodal sentiment analysis with two-phase multi-task learning. IEEE\/ACM Trans. Audio Speech Lang. Process. 30, 2015\u20132024 (2022). https:\/\/doi.org\/10.1109\/TASLP.2022.3178204","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"4387_CR23","doi-asserted-by":"publisher","unstructured":"Khan, Z., Fu, Y.: Exploiting BERT for multimodal target sentiment classification through input space translation. In: Proceedings of ACM International Conference on Multimedia. (MM \u201921), pp. 3034\u20133042 (2021). https:\/\/doi.org\/10.1145\/3474085.3475692","DOI":"10.1145\/3474085.3475692"},{"key":"4387_CR24","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1007\/s10462-023-10685-z","volume":"57","author":"Y Li","year":"2024","unstructured":"Li, Y., Ding, H., Lin, Y., et al.: Multi-level textual-visual alignment and fusion network for multimodal aspect-based sentiment analysis. Artif. Intell. Rev. 57, 78 (2024). https:\/\/doi.org\/10.1007\/s10462-023-10685-z","journal-title":"Artif. Intell. Rev."},{"key":"4387_CR25","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7794\u20137803 (2018). https:\/\/openaccess.thecvf.com\/content_cvpr_2018\/html\/Wang_Non-Local_Neural_Networks_CVPR_2018_paper.html","DOI":"10.1109\/CVPR.2018.00813"},{"key":"4387_CR26","doi-asserted-by":"publisher","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. In: 3rd International Conference on Learning Representations (ICLR 2015), (2015). https:\/\/doi.org\/10.48550\/arXiv.1412.6980","DOI":"10.48550\/arXiv.1412.6980"},{"key":"4387_CR27","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J. et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the International Conference on Machine Learning (ICML 2021), pp. 8748\u20138763 (2021). https:\/\/arxiv.org\/abs\/2103.00020"},{"key":"4387_CR28","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: Blip: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: Proceedings of the International Conference on Machine Learning (ICML 2022), pp. 12888\u201312900 (2022). https:\/\/arxiv.org\/abs\/2201.12086"},{"key":"4387_CR29","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1109\/TASLP.2019.2957872","volume":"28","author":"J Yu","year":"2020","unstructured":"Yu, J., Jiang, J., Xia, R.: Entity-sensitive attention and fusion network for entity-level multimodal sentiment classification. IEEE\/ACM Trans. Audio Speech Lang. Process. 28, 429\u2013439 (2020). https:\/\/doi.org\/10.1109\/TASLP.2019.2957872","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"4387_CR30","doi-asserted-by":"publisher","first-page":"8403","DOI":"10.1007\/s11760-024-03482-w","volume":"18","author":"Y Li","year":"2024","unstructured":"Li, Y., Zheng, X., Zhu, M., et al.: Compact bilinear pooling and multi-loss network for social media multimodal classification. SIViP 18, 8403\u20138412 (2024). https:\/\/doi.org\/10.1007\/s11760-024-03482-w","journal-title":"SIViP"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04387-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-04387-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04387-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T00:31:59Z","timestamp":1757205119000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-04387-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,2]]},"references-count":30,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["4387"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-04387-y","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,2]]},"assertion":[{"value":"13 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 June 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 June 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 July 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there are no conflict of interest statements.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"827"}}