{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T07:08:19Z","timestamp":1763017699303},"reference-count":96,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2017,11,1]],"date-time":"2017-11-01T00:00:00Z","timestamp":1509494400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2017,11,1]]},"DOI":"10.1109\/tpami.2016.2635138","type":"journal-article","created":{"date-parts":[[2016,12,2]],"date-time":"2016-12-02T19:30:31Z","timestamp":1480707031000},"page":"2284-2297","source":"Crossref","is-referenced-by-count":17,"title":["Visually Grounded Meaning Representations"],"prefix":"10.1109","volume":"39","author":[{"given":"Carina","family":"Silberer","sequence":"first","affiliation":[]},{"given":"Vittorio","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"Mirella","family":"Lapata","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1006\/jmla.1997.2559"},{"key":"ref72","author":"salton","year":"1986","journal-title":"Introduction to Modern Information Retrieval"},{"key":"ref71","first-page":"1871","article-title":"LIBLINEAR: A library for large linear classification","volume":"9","author":"fan","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref70","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/7287.001.0001","author":"fellbaum","year":"1998","journal-title":"WordNet An Electronic Lexical Database"},{"key":"ref76","first-page":"1","article-title":"Using WordNet-based context vectors to estimate the semantic relatedness of concepts","author":"patwardhan","year":"2006","journal-title":"Proceedings of Making Sense of Sense-Bringing Computational Linguistics and Psycholinguistics Together"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1080\/01690969108406936"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/503104.503110"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1005"},{"key":"ref75","first-page":"890","article-title":"A new set of norms for semantic relatedness measures","author":"szumlanski","year":"2013","journal-title":"Proc Ann Meeting Assoc for Computational Linguistics"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"ref78","article-title":"The University of South Florida word association, rhyme, and word fragment norms","author":"nelson","year":"1998"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1162\/0899766042321814"},{"key":"ref33","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.111.1.205"},{"key":"ref31","article-title":"From perceptual to language-mediated categorization","volume":"369","author":"westermann","year":"2014","journal-title":"Philosoph Trans Royal Soc B Biol Sci"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.98.1.74"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.4135","article-title":"Multimodal distributional semantics","volume":"49","author":"bruni","year":"2014","journal-title":"J Artif Intell Res"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/985692.985733"},{"key":"ref35","first-page":"248","article-title":"ImageNet: A large-scale hierarchical image database","author":"deng","year":"2009","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1032"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995353"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/s11168-010-9068-8"},{"key":"ref61","first-page":"9","article-title":"Combining methods to learn feature-norm-like concept descriptions","author":"barbu","year":"2008","journal-title":"Proceedings of the ESSLLI Workshop on Distributional Lexical Semantics"},{"key":"ref63","first-page":"61","article-title":"Acquiring human-like feature-based conceptual representations from corpora","author":"kelly","year":"2010","journal-title":"Proc NAACL HLT 1st Workshop Comput Neurolinguistics"},{"key":"ref28","first-page":"232","article-title":"Models of semantic memory","author":"jones","year":"2015","journal-title":"Oxford Handbook of Computational and Mathematical Psychology"},{"key":"ref64","first-page":"318","article-title":"Learning internal representations by error propagation","volume":"1","author":"rumelhart","year":"1986","journal-title":"Parallel Distributed Processing Explorations in the Microstructure of Cognition"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206772"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1561\/2200000006"},{"key":"ref66","first-page":"3371","article-title":"Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion","volume":"11","author":"vincent","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1037\/0096-3445.120.4.339"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390294"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390256"},{"key":"ref2","first-page":"2493","article-title":"Natural language processing (almost) from scratch","volume":"12","author":"collobert","year":"2011","journal-title":"J Mach Learn Res"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1613\/jair.2934","article-title":"From frequency to meaning: Vector space models of semantics","volume":"37","author":"turney","year":"2010","journal-title":"J Artif Intell Res"},{"key":"ref20","first-page":"136","article-title":"Distributional semantics in technicolor","author":"bruni","year":"2012","journal-title":"Proc Ann Meeting Assoc for Computational Linguistics"},{"key":"ref22","first-page":"1146","article-title":"A multimodal LDA model integrating textual, cognitive and visual modalities","author":"roller","year":"2013","journal-title":"Proc Conf Empirical Methods Natural Language Process"},{"key":"ref21","first-page":"572","article-title":"Models of semantic representation with visual attributes","author":"silberer","year":"2013","journal-title":"Proc Ann Meeting Assoc for Computational Linguistics"},{"key":"ref24","first-page":"689","article-title":"Multimodal deep learning","author":"ngiam","year":"2011","journal-title":"Proc 28th Int Conf Mach Learn"},{"key":"ref23","first-page":"153","article-title":"Greedy layer-wise training of deep networks","author":"bengio","year":"2006","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1111\/j.1551-6709.2009.01068.x"},{"key":"ref25","first-page":"2231","article-title":"Multimodal learning with deep Boltzmann machines","author":"srivastava","year":"2012","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref51","first-page":"2121","article-title":"DeViSE: A deep visual-semantic embedding model","author":"frome","year":"2013","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.3115\/1621474.1621476"},{"key":"ref94","first-page":"1916","article-title":"Meaning representation in natural language categorization","author":"fountain","year":"2010","journal-title":"Proc 31st Annu Conf Cogn Sci Soc"},{"key":"ref93","first-page":"73","article-title":"Chinese whispers&#x2014;an efficient graph clustering algorithm and its application to natural language processing problems","author":"biemann","year":"2006","journal-title":"Proc 1st Workshop Graph Based Methods Natural Language Process"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/E14-1027"},{"key":"ref91","first-page":"255","article-title":"Incremental models of natural language category acquisition","author":"fountain","year":"2011","journal-title":"Proc 32nd Annu Conf Cogn Sci Soc"},{"key":"ref90","first-page":"485","article-title":"Identifying representations of categories of discrete items using Markov chain Monte Carlo with people","author":"hsu","year":"2012","journal-title":"Proc 34th Annu Conf Cognit Sci Soc"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1111\/j.1756-8765.2011.01176.x"},{"key":"ref59","first-page":"3474","article-title":"Discovering localized attributes for fine-grained recognition","author":"duan","year":"2012","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247998"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2011.48"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1207\/s15516709cog1502_3"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206594"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"ref53","first-page":"433","article-title":"Learning visual attributes","author":"ferrari","year":"2007","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref52","first-page":"935","article-title":"Zero-shot learning through cross-modal transfer","author":"socher","year":"2013","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3758\/BF03196313"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/3608.001.0001","author":"regier","year":"1996","journal-title":"The Human Semantic Potential"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/N15-1016"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/S1364-6613(97)01111-X"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.psych.59.103006.093639"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.3758\/BF03192726"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1037\/a0016261"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.104.2.211"},{"key":"ref16","first-page":"1423","article-title":"Grounded models of semantic representation","author":"silberer","year":"2012","journal-title":"Proc Conf Empirical Methods Natural Language Process"},{"key":"ref81","first-page":"1247","article-title":"Deep canonical correlation analysis","author":"andrew","year":"2013","journal-title":"Proc 30th Int Conf Mach Learn"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3758\/BRM.40.1.183"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.222"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1207\/s15516709cog2303_4"},{"key":"ref83","first-page":"187","article-title":"VSEM: An open library for visual semantics representation","author":"bruni","year":"2013","journal-title":"Proc 51st Ann Meeting Assoc Comput Linguistics Syst Demonstrations"},{"key":"ref19","first-page":"91","article-title":"Visual information in semantic representation","author":"feng","year":"2010","journal-title":"Proc Human Language Technol Ann Conf North Amer Chapter Assoc Comput Linguistics"},{"key":"ref80","article-title":"Canonical correlation&#x2014;a tutorial","author":"borga","year":"2001"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1037\/0096-1523.21.1.128"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809071"},{"key":"ref3","first-page":"746","article-title":"Linguistic regularities in continuous space word representations","author":"mikolov","year":"2013","journal-title":"Proc Conf North Amer Chapter Assoc Comput Linguistics Human Language Technol"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/505282.505283"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1135777.1135835"},{"key":"ref85","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.114.2.211"},{"key":"ref86","first-page":"607","article-title":"Concepts and categorization","volume":"4","author":"goldstone","year":"2012","journal-title":"Comprehensive Handbook of Psychology"},{"key":"ref7","first-page":"1744","article-title":"Question answering using enhanced lexical semantic models","author":"yih","year":"2013","journal-title":"Proc Ann Meeting Assoc for Computational Linguistics"},{"key":"ref49","article-title":"Deep captioning with multimodal recurrent neural networks (m-RNN)","volume":"abs 1412 6632","author":"mao","year":"2014","journal-title":"CoRR"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.98.3.409"},{"key":"ref88","first-page":"726","article-title":"A more rational model of categorization","author":"sanborn","year":"2006","journal-title":"Proc 28th Ann Conf Cogn Sci Soc"},{"key":"ref9","first-page":"775","author":"harris","year":"1970","journal-title":"Distributional structure"},{"key":"ref46","first-page":"2141","article-title":"Improved multimodal deep learning with variation of information","author":"sohn","year":"2014","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref45","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1162\/tacl_a_00177","article-title":"Grounded compositional semantics for finding and describing images with sentences","volume":"2","author":"socher","year":"2014","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"ref48","article-title":"Unifying visual-semantic embeddings with multimodal neural language models","author":"kiros","year":"2014"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639140"},{"key":"ref42","first-page":"993","article-title":"Latent Dirichlet allocation","volume":"3","author":"blei","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref41","first-page":"22","article-title":"Distributional semantics from text and images","author":"bruni","year":"2011","journal-title":"Proceedings of the Workshop on Geometrical Models of Natural Language Semantics - GEMS '09"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502112"},{"key":"ref43","first-page":"2949","article-title":"Multimodal learning with deep Boltzmann machines","volume":"15","author":"srivastava","year":"2014","journal-title":"J Mach Learn Res"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/8055017\/07765034.pdf?arnumber=7765034","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:41:28Z","timestamp":1642005688000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7765034\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,11,1]]},"references-count":96,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2016.2635138","relation":{},"ISSN":["0162-8828","2160-9292"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,11,1]]}}}