{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T17:29:07Z","timestamp":1779211747792,"version":"3.51.4"},"reference-count":79,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/tmm.2019.2920620","type":"journal-article","created":{"date-parts":[[2019,6,3]],"date-time":"2019-06-03T23:04:03Z","timestamp":1559603043000},"page":"2985-2996","source":"Crossref","is-referenced-by-count":20,"title":["Learning Semantic Text Features for Web Text-Aided Image Classification"],"prefix":"10.1109","volume":"21","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1467-6023","authenticated-orcid":false,"given":"Dongzhe","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9191-8604","authenticated-orcid":false,"given":"Kezhi","family":"Mao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.231"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2655024"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.476"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2015.2389413"},{"key":"ref76","first-page":"69","article-title":"NLTK: The natural language toolkit","author":"bird","year":"0","journal-title":"Proceedings of the COLING\/ACL on Interactive presentation sessions -"},{"key":"ref77","first-page":"2825","article-title":"Scikit-learn: Machine learning in python","volume":"12","author":"pedregosa","year":"2011","journal-title":"J Mach Learn Res"},{"key":"ref74","first-page":"823","article-title":"Improving scene classification by fusion of training data and web resources","author":"wang","year":"0","journal-title":"Proc 18th Int Conf Inf Fusion"},{"key":"ref39","article-title":"Efficient estimation of word representations in vector space","author":"mikolov","year":"0"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2491929"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/N15-1011"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.106"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_52"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/0306-4573(88)90021-0"},{"key":"ref32","first-page":"2121","article-title":"Devise: A deep visual-semantic embedding model","author":"frome","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.222"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P14-1062"},{"key":"ref36","first-page":"69","article-title":"Deep convolutional neural networks for sentiment analysis of short texts","author":"santos","year":"0","journal-title":"Proc 25th Int Conf Comput Linguistics"},{"key":"ref35","first-page":"2042","article-title":"Convolutional neural network architectures for matching natural language sentences","author":"hu","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref34","article-title":"A sensitivity analysis of (and practitioners&#x2019; guide to) convolutional neural networks for sentence classification","author":"zhang","year":"2015","journal-title":"Comput Sci"},{"key":"ref60","article-title":"Caltech-256 object category dataset","author":"griffin","year":"2007"},{"key":"ref62","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"0"},{"key":"ref61","article-title":"The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Results","author":"everingham","year":"2012"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.195"},{"key":"ref27","first-page":"1097","article-title":"Imagenet classification with deep neural networks","author":"krizhevsky","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.513"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2015.02.003"},{"key":"ref29","first-page":"815","article-title":"Decaf: A deep convolutional activation feature for generic visual recognition","volume":"50","author":"donahue","year":"2013","journal-title":"Comput Sci"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_11"},{"key":"ref68","first-page":"818","article-title":"Visualizing and understanding convolutional networks","author":"zeiler","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref69","first-page":"741","article-title":"A deep generative deconvolutional image model","author":"pu","year":"0","journal-title":"Proc Artif Intell Statist"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273592"},{"key":"ref1","first-page":"892","article-title":"Semi-supervised learning","author":"zhu","year":"2011","journal-title":"Encyclopedia of Machine Learning"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1181"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1023\/A:1011139631724"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.1999.790410"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995484"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.68"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540018"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2014.02.013"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2007.4408872"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2005.853575"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2002.1031953"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.3115\/981732.981751"},{"key":"ref55","first-page":"1378","article-title":"Object bank: A high-level image representation for scene classification & semantic feature sparsification","author":"li","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref54","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"maaten","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874164"},{"key":"ref52","first-page":"114","article-title":"Adaptive multimodal fusion with web resources for scene classification","author":"wang","year":"0","journal-title":"Proc 19th Int Conf Inf Fusion"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206816"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1002\/int.21567"},{"key":"ref40","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/APWeb.2010.49"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654913"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2014.2375793"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2535864"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/151"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2869721"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2578726.2578729"},{"key":"ref19","first-page":"2493","article-title":"Natural language processing (almost) from scratch","volume":"12","author":"collobert","year":"2011","journal-title":"J Mach Learn Res"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995702"},{"key":"ref3","first-page":"181","article-title":"Exploiting weakly-labeled web images to improve object classification: A domain adaptation approach","author":"bergamo","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref6","first-page":"353","article-title":"Translated learning: Transfer learning across different feature spaces","author":"dai","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2014.07.005"},{"key":"ref8","article-title":"Heterogeneous transfer learning for image classification","author":"zhu","year":"0","journal-title":"Proc Nat Conf Artif Intell"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3115\/1687878.1687880"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/1180639.1180698"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2558463"},{"key":"ref45","first-page":"689","article-title":"Multimodal deep learning","author":"ngiam","year":"0","journal-title":"Proc 28th Int Conf Mach Learn"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00177"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/2983563.2983570"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1174"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref44","first-page":"2222","article-title":"Multimodal learning with deep boltzmann machines","author":"srivastava","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.23919\/ICIF.2017.8009768"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6046\/8906217\/08728055.pdf?arnumber=8728055","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,13]],"date-time":"2022-07-13T20:56:17Z","timestamp":1657745777000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8728055\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":79,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tmm.2019.2920620","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"value":"1520-9210","type":"print"},{"value":"1941-0077","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,12]]}}}