{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T18:03:40Z","timestamp":1780941820099,"version":"3.54.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"25","license":[{"start":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T00:00:00Z","timestamp":1705881600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T00:00:00Z","timestamp":1705881600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-18105-8","type":"journal-article","created":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T08:02:41Z","timestamp":1705910561000},"page":"66263-66281","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["Novelty fused image and text models based on deep neural network and transformer for multimodal sentiment analysis"],"prefix":"10.1007","volume":"83","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9400-7582","authenticated-orcid":false,"given":"Bui Thanh","family":"Hung","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nguyen Hoang Minh","family":"Thu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,1,22]]},"reference":[{"key":"18105_CR1","unstructured":"Bommasani R, Hudson DA, Adeli E, Altman R, Arora S, von Arx S, Bernstein MS, Bohg J, Bosselut A, Brunskill E et al (2021) On the opportunities and risks of foundation models. arXiv:2108.07258"},{"key":"18105_CR2","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In Proc IEEE Conf Comput Vis Pattern Recognit (pp. 580\u2013587)","DOI":"10.1109\/CVPR.2014.81"},{"key":"18105_CR3","doi-asserted-by":"crossref","unstructured":"Hung BT, Tien LM (2021) Facial expression recognition with CNN-LSTM. In\u00a0Research in Intelligent\u00a0Computing in Engineering: Select Proceedings of Rice 2020 (pp. 549\u2013560). Springer Singapore","DOI":"10.1007\/978-981-15-7527-3_52"},{"key":"18105_CR4","doi-asserted-by":"crossref","unstructured":"Gupta A, Pramanik S, Bui HT, Ibenu NM (2021) Machine learning and deep learning in steganography and steganalysis. In Multidisciplinary Approach to Modern Digital Steganography (pp. 75\u201398). IGI Global","DOI":"10.4018\/978-1-7998-7160-6.ch004"},{"issue":"11","key":"18105_CR5","doi-asserted-by":"publisher","first-page":"17291","DOI":"10.1007\/s11042-020-10043-z","volume":"80","author":"Y Xie","year":"2021","unstructured":"Xie Y, Yang H, Yuan X, He Q, Zhang R, Zhu Q, Chu Z, Yang C, Qin P, Yan C (2021) Stroke prediction from electrocardiograms by deep neural network. Multimed Tools Appl 80(11):17291\u201317297","journal-title":"Multimed Tools Appl"},{"key":"18105_CR6","doi-asserted-by":"crossref","unstructured":"Thanh Hung B (2022) Content-based image retrieval using multi-deep learning models. In next generation of internet of things: Proceedings of ICNGIoT 2022 (pp. 347\u2013357). Singapore: Springer Nature Singapore","DOI":"10.1007\/978-981-19-1412-6_29"},{"key":"18105_CR7","doi-asserted-by":"crossref","unstructured":"Shorten C, Khoshgoftaar TM (2019) A survey on image data augmentation for deep learning. J Big Data\u00a06(1):1\u201348","DOI":"10.1186\/s40537-019-0197-0"},{"key":"18105_CR8","doi-asserted-by":"crossref","unstructured":"Hung BT, Chakrabarti P (2022) Parking lot occupancy detection using hybrid deep learning CNN-LSTM approach. In Proceedings of 2nd international conference on artificial intelligence: advances and applications: ICAIAA 2021 (pp. 501\u2013509). Singapore: Springer Nature Singapore","DOI":"10.1007\/978-981-16-6332-1_43"},{"key":"18105_CR9","unstructured":"Gesmundo A, Dean J (2022) muNET: evolving pretrained deep neural networks into scalable auto-tuning multitask systems. arXiv:2205.10937"},{"key":"18105_CR10","unstructured":"Crawshaw M (2020) Multi-task learning with deep neural networks: a survey. arXiv:2009.09796"},{"key":"18105_CR11","doi-asserted-by":"publisher","first-page":"3713","DOI":"10.1007\/s11042-022-13428-4","volume":"82","author":"D Khurana","year":"2023","unstructured":"Khurana D, Koli A, Khatter K et al (2023) Natural language processing: state of the art, current trends and challenges. Multimed Tools Appl 82:3713\u20133744","journal-title":"Multimed Tools Appl"},{"key":"18105_CR12","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2018c) Bert: pretraining of deep bidirectional transformers for language understanding. arXiv:1810.04805"},{"key":"18105_CR13","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2018) BERT: pretraining of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"18105_CR14","doi-asserted-by":"publisher","first-page":"2544","DOI":"10.1002\/asi.21416","volume":"61","author":"M Thelwall","year":"2010","unstructured":"Thelwall M, Buckley K, Paltoglou G, Cai D, Kappas A (2010) Sentiment strength detection in short informal text. J Am Soc Inf Sci Technol 61:2544\u20132558. https:\/\/doi.org\/10.1002\/asi.21416","journal-title":"J Am Soc Inf Sci Technol"},{"key":"18105_CR15","unstructured":"Zhang L, Ghosh R, Dekhil M, Hsu M, Liu B (2011) Combining lexicon-based and learning-based methods for Twitter sentiment analysis. HP Laboratories, Technical Report HPL-2011, 89:1\u20138"},{"key":"18105_CR16","doi-asserted-by":"publisher","first-page":"26597","DOI":"10.1007\/s11042-019-07788-7","volume":"78","author":"AU Rehman","year":"2019","unstructured":"Rehman AU, Malik A, Raza B, Ali W (2019) A hybrid CNN-LSTM model for improving accuracy of movie reviews sentiment analysis. Multimed Tools Appl 78:26597\u201326613. https:\/\/doi.org\/10.1007\/s11042-019-07788-7","journal-title":"Multimed Tools Appl"},{"key":"18105_CR17","doi-asserted-by":"publisher","first-page":"1440","DOI":"10.1049\/iet-ipr.2019.1270","volume":"14","author":"A Ortis","year":"2020","unstructured":"Ortis A, Farinella GM, Battiato S (2020) Survey on visual sentiment analysis. IET Image Proc 14:1440\u20131456","journal-title":"IET Image Proc"},{"issue":"3","key":"18105_CR18","doi-asserted-by":"publisher","first-page":"1030","DOI":"10.3390\/app12031030","volume":"12","author":"G Chandrasekaran","year":"2022","unstructured":"Chandrasekaran G, Antoanela N, Andrei G, Monica C, Hemanth J (2022) Visual sentiment analysis using deep learning models with social media data. Appl Sci 12(3):1030","journal-title":"Appl Sci"},{"key":"18105_CR19","doi-asserted-by":"crossref","unstructured":"Zadeh A, Chen M, Poria S, Cambria E, Morency LP (2017) Tensor fusion network for multimodal sentiment analysis. arXiv:1707.07250","DOI":"10.18653\/v1\/D17-1115"},{"key":"18105_CR20","unstructured":"Birhane A, Prabhu VU, Kahembwe E (2021) Multimodal datasets: misogyny, pornography, and malignant stereotypes. arXiv:2110.01963"},{"key":"18105_CR21","doi-asserted-by":"crossref","unstructured":"Bachmann R, Mizrahi D, Atanov A, Zamir A (2022) MultiMAE: multi-modal multi-task masked autoencoders. arXiv:2204.01678","DOI":"10.1007\/978-3-031-19836-6_20"},{"key":"18105_CR22","doi-asserted-by":"crossref","unstructured":"Wu W, Wang Y, Xu S, Yan K (2020) SFNN: semantic features fusion neural network for multimodal sentiment analysis. 2020 5th International Conference on Automation, Control and Robotics Engineering (CACRE), Dalian, China, pp 661\u2013665","DOI":"10.1109\/CACRE50138.2020.9230015"},{"key":"18105_CR23","doi-asserted-by":"crossref","unstructured":"Xu N, Mao W (2017) MultiSentiNet: a deep semantic network for multimodal sentiment analysis. Proceedings of the 2017 ACM on conference on information and knowledge management, pp 2399\u20132402","DOI":"10.1145\/3132847.3133142"},{"key":"18105_CR24","doi-asserted-by":"crossref","unstructured":"Xu N, Mao W, Guandan C (2018) A co-memory network for multimodal sentiment analysis. In The 41st international ACM SIGIR conference on research & development in information retrieval (pp. 929\u2013932)","DOI":"10.1145\/3209978.3210093"},{"key":"18105_CR25","doi-asserted-by":"crossref","unstructured":"Wang M, Cao D, Li L, Li S, Ji R (2012) Microblog sentiment analysis based on cross-media bag-of-words model. In Proceedings of international conference on internet multimedia computing and service (pp. 76\u201380)","DOI":"10.1145\/2632856.2632912"},{"issue":"2","key":"18105_CR26","doi-asserted-by":"publisher","first-page":"41","DOI":"10.3390\/a9020041","volume":"9","author":"Y Yu","year":"2016","unstructured":"Yu Y, Lin H, Meng J, Zhao Z (2016) Visual and textual sentiment analysis of a microblog using deep convolutional neural networks. Algorithms 9(2):41","journal-title":"Algorithms"},{"key":"18105_CR27","doi-asserted-by":"publisher","unstructured":"Gaspar A, Alexandre L (2019) A multimodal approach to image sentiment analysis. https:\/\/doi.org\/10.1007\/978-3-030-33607-3_33","DOI":"10.1007\/978-3-030-33607-3_33"},{"key":"18105_CR28","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, van der Maaten L (2018) Densely connected convolutional networks. arXiv:1608.06993","DOI":"10.1109\/CVPR.2017.243"},{"key":"18105_CR29","doi-asserted-by":"crossref","unstructured":"Yadav A, Vishwakarma DK (2022) A deep multi-level attentive network for multimodal sentiment analysis. ACM Trans Multimed Comput Commun Appl","DOI":"10.1145\/3517139"},{"key":"18105_CR30","doi-asserted-by":"crossref","unstructured":"Borth D, Ji R, Chen T, Breuel T, Chang S-F (2013) Large-scale visual sentiment ontology and detectors using adjective noun pairs. Proceedings of the 21st ACM international conference on Multimedia. ACM, pp 223\u2013232","DOI":"10.1145\/2502081.2502282"},{"key":"18105_CR31","doi-asserted-by":"crossref","unstructured":"Cai G, Xia B (2015) Convolutional neural networks for multimedia sentiment analysis. In: Natural language processing and Chinese computing. Springer, pp 159\u2013167","DOI":"10.1007\/978-3-319-25207-0_14"},{"key":"18105_CR32","doi-asserted-by":"crossref","unstructured":"Xu N (2017) Analyzing multimodal public sentiment based on hierarchical semantic attentional network. In: ISI. IEEE, pp 152\u2013154","DOI":"10.1109\/ISI.2017.8004895"},{"key":"18105_CR33","doi-asserted-by":"crossref","unstructured":"De Toledo GL, Marcacini RM (2022) Transfer learning with joint fine-tuning for multimodal sentiment analysis. arXiv preprint arXiv:2210.05790","DOI":"10.52591\/lxai202207173"},{"key":"18105_CR34","doi-asserted-by":"crossref","unstructured":"Niu T, Zhu SA, Pang L, El Saddik A (2016) Sentiment analysis on multi-view social data. MultiMedia Modeling (MMM), Miami, pp 15\u201327","DOI":"10.1007\/978-3-319-27674-8_2"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-18105-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-18105-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-18105-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,8]],"date-time":"2024-11-08T19:52:25Z","timestamp":1731095545000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-18105-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,22]]},"references-count":34,"journal-issue":{"issue":"25","published-online":{"date-parts":[[2024,7]]}},"alternative-id":["18105"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-18105-8","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1,22]]},"assertion":[{"value":"22 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 October 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 December 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 January 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"The authors declare that we have no conflict of interest.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The authors declare that we have no competing interest.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}