{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:31:37Z","timestamp":1777613497529,"version":"3.51.4"},"reference-count":50,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U24A20335"],"award-info":[{"award-number":["U24A20335"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62473241"],"award-info":[{"award-number":["62473241"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476162"],"award-info":[{"award-number":["62476162"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62376143"],"award-info":[{"award-number":["62376143"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272286"],"award-info":[{"award-number":["62272286"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neucom.2026.133398","type":"journal-article","created":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T08:37:56Z","timestamp":1773909476000},"page":"133398","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["Target-oriented consistent cross-modal alignment framework for multimodal stance detection"],"prefix":"10.1016","volume":"681","author":[{"given":"Shuqi","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Suge","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoli","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Deyu","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jian","family":"Liao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianxing","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruifeng","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.133398_bib0005","author":"Bai"},{"key":"10.1016\/j.neucom.2026.133398_bib0010","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","article-title":"Multimodal machine learning: a survey and taxonomy","volume":"41","author":"Baltru\u0161aitis","year":"2018","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2026.133398_bib0015","series-title":"Proceedings of the 31st International Conference on Computational Linguistics","first-page":"6492","article-title":"Acquired taste: multimodal stance detection with textual and structural embeddings","author":"Barel","year":"2025"},{"key":"10.1016\/j.neucom.2026.133398_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.127549","article-title":"SSRI-Net: subthreads stance\u2013rumor interaction network for rumor verification","volume":"583","author":"Chen","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133398_bib0025","author":"Chen"},{"key":"10.1016\/j.neucom.2026.133398_bib0030","series-title":"Findings of the Association for Computational Linguistics: ACL 2025","first-page":"3221","article-title":"Zero-shot conversational stance detection: dataset and approaches","author":"Ding","year":"2025"},{"key":"10.1016\/j.neucom.2026.133398_bib0035","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.128849","article-title":"Adversarial contrastive representation training with external knowledge injection for zero-shot stance detection","volume":"614","author":"Ding","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133398_bib0040","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1007\/s10462-025-11250-6","article-title":"Decoupling feature-driven and multimodal fusion attention for clothing-changing person re-identification","volume":"58","author":"Ding","year":"2025","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.neucom.2026.133398_bib0045","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.132237","article-title":"Fa-ReID: feature alignment with adversarial learning for clothing-changing person re-identification","volume":"666","author":"Ding","year":"2026","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133398_bib0050","series-title":"International Conference on Learning Representations (ICLR)","article-title":"An image is worth 16x16 words: transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"key":"10.1016\/j.neucom.2026.133398_bib0055","series-title":"International Conference on Learning Representations","article-title":"Learning robust representations via multi-view information bottleneck","author":"Federici","year":"2020"},{"key":"10.1016\/j.neucom.2026.133398_bib0060","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111510","article-title":"Learning multi-granularity representation with transformer for visible-infrared person re-identification","volume":"164","author":"Feng","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133398_bib0065","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110981","article-title":"Homogeneous and heterogeneous relational graph for visible-infrared person re-identification","volume":"158","author":"Feng","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133398_bib0070","series-title":"NeurIPS 2024 Workshop on Compositional Learning: Perspectives, Methods, and Paths Forward","article-title":"Generating intermediate representations for compositional text-to-image generation","author":"Galun","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0075","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.neucom.2026.133398_bib0080","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2024","first-page":"4407","article-title":"Recent advances in online hate speech moderation: multimodality and the role of large models","author":"Hee","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0085","series-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics","first-page":"2225","article-title":"Knowledgeable prompt-tuning: incorporating knowledge into prompt verbalizer for text classification","author":"Hu","year":"2022"},{"key":"10.1016\/j.neucom.2026.133398_bib0090","series-title":"Proceedings of the 31st ACM International Conference on Multimedia","first-page":"5185","article-title":"Prompt me up: unleashing the power of alignments for multimodal entity and relation extraction","author":"Hu","year":"2023"},{"key":"10.1016\/j.neucom.2026.133398_bib0095","series-title":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","first-page":"837","article-title":"Few-shot stance detection via target-aware prompt distillation","author":"Jiang","year":"2022"},{"key":"10.1016\/j.neucom.2026.133398_bib0100","series-title":"Proceedings of the Thirteenth Language Resources and Evaluation Conference","first-page":"7360","article-title":"PoliBERTweet: a pre-trained language model for analyzing political content on Twitter","author":"Kawintiranon","year":"2022"},{"key":"10.1016\/j.neucom.2026.133398_bib0105","series-title":"Proceedings of NAACL-HLT","first-page":"2","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Chang","year":"2019"},{"key":"10.1016\/j.neucom.2026.133398_bib0110","series-title":"International Conference on Machine Learning","first-page":"5583","article-title":"ViLT: vision-and-language transformer without convolution or region supervision","author":"Kim","year":"2021"},{"key":"10.1016\/j.neucom.2026.133398_bib0115","series-title":"Findings of the Association for Computational Linguistics: ACL 2023","first-page":"6316","article-title":"Distilling calibrated knowledge for stance detection","author":"Li","year":"2023"},{"key":"10.1016\/j.neucom.2026.133398_bib0120","series-title":"Findings of the Association for Computational Linguistics ACL 2024","first-page":"12373","article-title":"Multi-modal stance detection: new datasets and model","author":"Liang","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0125","author":"Liu"},{"key":"10.1016\/j.neucom.2026.133398_bib0130","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","article-title":"Swin transformer: hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.neucom.2026.133398_bib0135","series-title":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","first-page":"5487","article-title":"EmoLLMs: a series of emotional large language models and annotation tools for comprehensive affective analysis","author":"Liu","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0140","series-title":"Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence","first-page":"7202","article-title":"Multimodal keyless attention fusion for video classification","author":"Long","year":"2018"},{"key":"10.1016\/j.neucom.2026.133398_bib0145","doi-asserted-by":"crossref","first-page":"776","DOI":"10.1109\/TMM.2023.3271019","article-title":"A transformer-based model with self-distillation for multimodal emotion recognition in conversations","volume":"26","author":"Ma","year":"2024","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.neucom.2026.133398_bib0150","series-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation","first-page":"6732","article-title":"Examining temporalities on stance detection towards COVID-19 vaccination","author":"Mu","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0155","series-title":"Proceedings of the 32nd ACM International Conference on Multimedia","first-page":"3867","article-title":"Multimodal multi-turn conversation stance detection: a challenge dataset and effective model","author":"Niu","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0160","series-title":"GPT-4V(ision) System Card","year":"2023"},{"key":"10.1016\/j.neucom.2026.133398_bib0165","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2025","first-page":"20971","article-title":"Riemannian optimization for LoRA on the Stiefel manifold","author":"Park","year":"2025"},{"key":"10.1016\/j.neucom.2026.133398_bib0170","author":"Yang"},{"key":"10.1016\/j.neucom.2026.133398_bib0175","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.neucom.2026.133398_bib0180","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"15638","article-title":"FLAVA: a foundational language and vision alignment model","author":"Singh","year":"2022"},{"key":"10.1016\/j.neucom.2026.133398_bib0185","series-title":"2015 IEEE Information Theory Workshop","article-title":"Deep learning and the information bottleneck principle","author":"Tishby","year":"2015"},{"key":"10.1016\/j.neucom.2026.133398_bib0190","author":"Touvron"},{"key":"10.1016\/j.neucom.2026.133398_bib0195","author":"Vasilakes"},{"key":"10.1016\/j.neucom.2026.133398_bib0200","series-title":"Proceedings of the Third Workshop on NLP for Positive Impact","first-page":"315","article-title":"MultiClimate: multimodal stance detection on climate change videos","author":"Wang","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0205","first-page":"1","article-title":"CIME: contextual interaction-based multimodal emotion analysis with enhanced semantic information","author":"Wang","year":"2025","journal-title":"IEEE Trans. Comput. Soc. Syst."},{"key":"10.1016\/j.neucom.2026.133398_bib0210","series-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation","first-page":"15585","article-title":"Target-adaptive consistency enhanced prompt-tuning for multi-domain stance detection","author":"Wang","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0215","series-title":"Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing","first-page":"12597","article-title":"Identification of multimodal stance towards frames of communication","author":"Weinzierl","year":"2023"},{"key":"10.1016\/j.neucom.2026.133398_bib0220","series-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics","first-page":"861","article-title":"Tree-of-counterfactual prompting for zero-shot stance detection","author":"Weinzierl","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0225","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"16076","article-title":"MmAP: multi-modal alignment prompt for cross-domain multi-task learning","author":"Xin","year":"2024"},{"key":"10.1016\/j.neucom.2026.133398_bib0230","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"10637","article-title":"BridgeTower: building bridges between encoders in vision-language representation learning","author":"Xu","year":"2023"},{"key":"10.1016\/j.neucom.2026.133398_bib0235","author":"Yamaguchi"},{"key":"10.1016\/j.neucom.2026.133398_bib0240","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2023.126943","article-title":"Commonsense-based adversarial learning framework for zero-shot stance detection","volume":"563","author":"Zhang","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133398_bib0245","series-title":"Proceedings of the 28th International Conference on Computational Linguistics","first-page":"568","article-title":"SentiX: a sentiment-aware pre-trained model for cross-domain sentiment analysis","author":"Zhou","year":"2020"},{"key":"10.1016\/j.neucom.2026.133398_bib0250","series-title":"Findings of the Association for Computational Linguistics: ACL 2024","first-page":"10057","article-title":"An empirical study on parameter-efficient fine-tuning for MultiModal large language models","author":"Zhou","year":"2024"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226007952?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226007952?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T20:24:42Z","timestamp":1776975882000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226007952"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":50,"alternative-id":["S0925231226007952"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133398","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Target-oriented consistent cross-modal alignment framework for multimodal stance detection","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133398","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"133398"}}