{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T21:25:22Z","timestamp":1780694722956,"version":"3.54.1"},"reference-count":169,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/OAPA.html"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61502104"],"award-info":[{"award-number":["61502104"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61672159"],"award-info":[{"award-number":["61672159"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Fujian Collaborative Innovation Center for Big Data Application in Governments"},{"name":"Technology Innovation Platform Project of Fujian Province","award":["2014H2005"],"award-info":[{"award-number":["2014H2005"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2019]]},"DOI":"10.1109\/access.2019.2916887","type":"journal-article","created":{"date-parts":[[2019,5,15]],"date-time":"2019-05-15T16:13:08Z","timestamp":1557936788000},"page":"63373-63394","source":"Crossref","is-referenced-by-count":439,"title":["Deep Multimodal Representation Learning: A Survey"],"prefix":"10.1109","volume":"7","author":[{"given":"Wenzhong","family":"Guo","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7603-1581","authenticated-orcid":false,"given":"Jianwen","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shiping","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2016.2535122"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00921"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.497"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00177"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2852503"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123326"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/263"},{"key":"ref36","author":"kiros","year":"2014","journal-title":"Unifying Visual-Semantic Embeddings with Multimodal Neural Language Models"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/N15-1016"},{"key":"ref34","first-page":"2121","article-title":"DeViSE: A deep visual-semantic embedding model","volume":"2","author":"frome","year":"2013","journal-title":"Proc 26th Int Conf Neural Inf Process Syst"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2014.12.020"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639140"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298966"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2738401"},{"key":"ref22","first-page":"1","article-title":"Multi-attention recurrent network for human communication comprehension","author":"zadeh","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2670560"},{"key":"ref24","first-page":"289","article-title":"Hierarchical question-image co-attention for visual question answering","author":"lu","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1044"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.299"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/2671188.2749400"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2487612"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638346"},{"key":"ref50","author":"mikolov","year":"2013","journal-title":"Efficient Estimation of Word Representations in Vector Space"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.683"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.94"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref155","first-page":"1","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref150","first-page":"5767","article-title":"Improved training of wasserstein gans","author":"gulrajani","year":"2017","journal-title":"Proc 31st Conf Neural Inf Process Syst"},{"key":"ref152","first-page":"2204","article-title":"Recurrent models of visual attention","author":"mnih","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1080\/135062800394667"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref147","first-page":"2234","article-title":"Improved techniques for training GANs","author":"salimans","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref148","first-page":"1","article-title":"Unrolled generative adversarial networks","author":"metz","year":"2017","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref149","author":"arjovsky","year":"2017","journal-title":"Wasserstein GAN"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015015"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/72.279181"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1016\/0364-0213(90)90002-E"},{"key":"ref55","first-page":"1","article-title":"Radical-based hierarchical embeddings for Chinese sentiment analysis at sentence level","author":"peng","year":"2017","journal-title":"Proc 13th Int Flairs Conf"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"ref52","first-page":"2741","article-title":"Character-aware neural language models","author":"kim","year":"2016","journal-title":"Proc 30th AAAI Conf Artif Intell"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.364"},{"key":"ref167","first-page":"1","article-title":"ATRank: An attention-based user behavior modeling framework for recommendation","author":"zhou","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref166","first-page":"1","article-title":"Memory fusion network for multi-view sequential learning","author":"zadeh","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref165","first-page":"1","article-title":"Multimodal keyless attention fusion for video classification","author":"long","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.657"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.201"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.667"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.145"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46478-7_28"},{"key":"ref4","author":"peng","year":"2017","journal-title":"CM-GANs Cross-modal generative adversarial networks for common representation learning"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/264746a0"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.sigpro.2015.01.001"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1873987"},{"key":"ref159","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2627563"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.341"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.232"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2015.01.095"},{"key":"ref46","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref48","first-page":"1","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2729019"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/N15-1173"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.629"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2855422"},{"key":"ref127","first-page":"77","article-title":"Audio-visual speech recognition for a person with severe hearing loss using deep canonical correlation analysis","author":"takashima","year":"2017","journal-title":"Proc International Workshop on Challenges in Hearing Assistive Technology (CHAT)"},{"key":"ref126","author":"yu","year":"2017","journal-title":"Deep cross-modal correlation learning for audio and lyrics in music retrieval"},{"key":"ref125","first-page":"910","article-title":"Bridging languages through images with deep partial canonical correlation analysis","volume":"1","author":"rotman","year":"2018","journal-title":"Proc Annual Meeting of the Assoc Computational Linguistics"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/N15-1028"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2753232"},{"key":"ref72","first-page":"1","article-title":"Learning representations for multimodal data with deep belief nets","volume":"79","author":"srivastava","year":"2012","journal-title":"Proc Int Machine Learning Workshop"},{"key":"ref129","first-page":"1486","article-title":"Deep generative image models using a Laplacian pyramid of adversarial networks","author":"denton","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2487591"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1581"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref130","first-page":"1","article-title":"Unsupervised representation learning with deep convolutional generative adversarial networks","author":"radford","year":"2016","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref77","first-page":"2346","article-title":"Jointly modeling deep video and compositional text to bridge vision and language in a unified framework","author":"xu","year":"2015","journal-title":"Proc 29th AAAI Conf Artif Intell"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2016.11.004"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2558463"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/554"},{"key":"ref134","first-page":"2172","article-title":"InfoGAN: Interpretable representation learning by information maximizing generative adversarial nets","author":"chen","year":"2016","journal-title":"Proc 30th Int Conf Neural Inf Process Syst"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2646180"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2742704"},{"key":"ref136","author":"lipton","year":"2017","journal-title":"Precise Recovery of Latent Vectors from Generative Adversarial Networks"},{"key":"ref135","author":"creswell","year":"2016","journal-title":"Inverting The Generator Of A Generative Adversarial Network"},{"key":"ref138","first-page":"1","article-title":"Adversarial feature learning","author":"donahue","year":"2017","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref137","first-page":"1","article-title":"Adversarially learned inference","author":"dumoulin","year":"2017","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref139","author":"mirza","year":"2014","journal-title":"Conditional generative adversarial nets"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2877934"},{"key":"ref61","first-page":"2342","article-title":"An empirical exploration of recurrent network architectures","author":"jozefowicz","year":"2015","journal-title":"Proceedings of the 32nd Intl Conf on Machine Learning"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.13"},{"key":"ref65","author":"chung","year":"2014","journal-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling"},{"key":"ref141","first-page":"217","article-title":"Learning what and where to draw","author":"reed","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1181"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P14-1062"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1007\/s11280-018-0541-x"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1081"},{"key":"ref144","first-page":"1","article-title":"Unsupervised generative adversarial cross-modal hashing","author":"zhang","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2663324"},{"key":"ref69","first-page":"1","article-title":"OpenFace: An open source facial behavior analysis toolkit","author":"baltru\u0161aitis","year":"2016","journal-title":"Proc IEEE Winter Conf Appl Comput Vis"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2878970"},{"key":"ref1","first-page":"689","article-title":"Multimodal deep learning","author":"ngiam","year":"2011","journal-title":"Proc 28th Int Conf Mach Learn"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/28.3-4.321"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.131"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2015.2487860"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.128"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.14778\/2732296.2732301"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3136801"},{"key":"ref106","first-page":"2291","article-title":"Deep multimodal hashing with orthogonal regularization","author":"wang","year":"2015","journal-title":"Proc 24th Int Conf Artif Intell"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P14-1068"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.345"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390294"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.450"},{"key":"ref103","first-page":"3","article-title":"Autoencoders, minimum description length and Helmholtz free energy","author":"hinton","year":"1994","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref102","first-page":"693","article-title":"Efficient learning of deep Boltzmann machines","author":"salakhutdinov","year":"2010","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref111","author":"akaho","year":"2006","journal-title":"A kernel method for canonical correlation analysis"},{"key":"ref112","author":"mallinar","year":"2018","journal-title":"Deep canonically correlated LSTMs"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1162\/0899766042321814"},{"key":"ref98","first-page":"448","article-title":"Deep Boltzmann machines","author":"salakhutdinov","year":"2009","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1162\/089976602760128018"},{"key":"ref96","first-page":"2222","article-title":"Multimodal learning with deep Boltzmann machines","author":"srivastava","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1162\/neco.2006.18.7.1527"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1115"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654902"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/365"},{"key":"ref13","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"Proceedings of the 32nd Intl Conf on Machine Learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref15","first-page":"1060","article-title":"Generative adversarial text to image synthesis","author":"reed","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref118","first-page":"1083","article-title":"On deep multi-view representation learning","volume":"37","author":"wang","year":"2015","journal-title":"Proceedings of the 32nd Intl Conf on Machine Learning"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref82","first-page":"2672","article-title":"Generative adversarial nets","volume":"2","author":"goodfellow","year":"2014","journal-title":"Proc 27th Int Conf Neural Inf Process Syst (NIPS)"},{"key":"ref117","first-page":"1247","article-title":"Deep canonical correlation analysis","author":"andrew","year":"2013","journal-title":"Proc 30th Int Conf Mach Learn"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2017.02.007"},{"key":"ref81","first-page":"723","article-title":"A kernel two-sample test","volume":"13","author":"gretton","year":"2012","journal-title":"J Mach Learn Res"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2798607"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_11"},{"key":"ref119","article-title":"A probabilistic interpretation of canonical correlation analysis","author":"bach","year":"2005"},{"key":"ref19","author":"li","year":"2016","journal-title":"A survey of multi-view representation learning"},{"key":"ref83","author":"mor","year":"2018","journal-title":"A universal music translation network"},{"key":"ref114","first-page":"1","article-title":"Kernel independent component analysis","volume":"3","author":"bach","year":"2002","journal-title":"J Mach Learn Res"},{"key":"ref113","first-page":"682","article-title":"Using the Nystr&#x00F6;m method to speed up kernel machines","author":"williams","year":"2001","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref116","first-page":"1","article-title":"Kernel CCA for multi-view learning of acoustic features using articulatory measurements","author":"arora","year":"2012","journal-title":"Proc Symp Mach Learning Speech Lang Process"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.541"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013625426931"},{"key":"ref120","author":"wang","year":"2016","journal-title":"Deep variational canonical correlation analysis"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.337"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178840"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2015.7447071"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00161"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1613\/jair.4900"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00750"},{"key":"ref87","first-page":"311","article-title":"BLEU: A method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"Proc Annual Meeting of the Assoc Computational Linguistics"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995466"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8600701\/08715409.pdf?arnumber=8715409","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T11:54:32Z","timestamp":1641988472000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8715409\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"references-count":169,"URL":"https:\/\/doi.org\/10.1109\/access.2019.2916887","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]}}}