{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T17:09:42Z","timestamp":1761844182942},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2012,3,7]],"date-time":"2012-03-07T00:00:00Z","timestamp":1331078400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2012,4]]},"DOI":"10.1007\/s13735-012-0006-4","type":"journal-article","created":{"date-parts":[[2012,3,6]],"date-time":"2012-03-06T13:15:09Z","timestamp":1331039709000},"page":"31-44","source":"Crossref","is-referenced-by-count":13,"title":["Multimodal Image Retrieval"],"prefix":"10.1007","volume":"1","author":[{"given":"Stefan","family":"Romberg","sequence":"first","affiliation":[]},{"given":"Rainer","family":"Lienhart","sequence":"additional","affiliation":[]},{"given":"Eva","family":"H\u00f6rster","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,3,7]]},"reference":[{"key":"6_CR1","first-page":"1107","volume":"3","author":"K Barnard","year":"2003","unstructured":"Barnard K, Duygulu P, Forsyth D, Blei DM, Hofmann T, Poggio T, Shawe-taylor J (2003) Matching words and pictures. J Mach Learn Res 3:1107\u20131135","journal-title":"J Mach Learn Res"},{"issue":"3","key":"6_CR2","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1016\/j.cviu.2007.09.014","volume":"110","author":"H Bay","year":"2008","unstructured":"Bay H, Ess A, Tuytelaars T, Van Gool L (2008) SURF: speeded up robust features. Comput Vis Imag Underst 110(3):346\u2013359","journal-title":"Comput Vis Imag Underst"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Berg AC, Berg TL, Malik J (2005) Shape matching and object recognition using low distortion correspondences. In: IEEE conference on computer vision and pattern recognition (CVPR\u201905), vol 1. Washington, DC, pp 26\u201333","DOI":"10.1109\/CVPR.2005.320"},{"key":"6_CR4","unstructured":"Blei D, Lafferty J (2006) Correlated topic models. In: Advances in neural information processing systems, vol 18, pp 147\u2013154"},{"key":"6_CR5","unstructured":"Blei DM, Jordan MI (2003) Modeling annotated data. In: ACM SIGIR conference on research and development in information retrieval (SIGIR\u201903), pp 127\u2013134"},{"key":"6_CR6","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei DM, Ng AY, Jordan MI (2003) Latent dirichlet allocation. J\u00a0Mach Learn Res 3:993\u20131022","journal-title":"J\u00a0Mach Learn Res"},{"key":"6_CR7","first-page":"517","volume":"3954","author":"A Bosch","year":"2006","unstructured":"Bosch A, Zisserman A, Mu\u00f1oz X (2006) Scene classification via pLSA. Eur Confer Comput Vis (ECCV\u201906) 3954:517\u2013530","journal-title":"Eur Confer Comput Vis (ECCV\u201906)"},{"issue":"1","key":"6_CR8","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"A Dempster","year":"1977","unstructured":"Dempster A, Laird N, Rubin D (1977) Maximum likelihood from incomplete data via the EM algorithm. J R Stat Soc 39(1):1\u201338","journal-title":"J R Stat Soc"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: IEEE conference on computer vision and pattern recognition (CVPR\u201909)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"2","key":"6_CR10","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2009","unstructured":"Everingham M, Van Gool L, Williams C, Winn J, Zisserman A (2009) The pascal visual object classes (VOC) challenge. Int J Comput Vis (IJCV\u201904) 88(2):303\u2013338","journal-title":"Int J Comput Vis (IJCV\u201904)"},{"key":"6_CR11","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/7287.001.0001","volume-title":"WordNet: an electronic lexical database","author":"C Fellbaum","year":"1998","unstructured":"Fellbaum C (1998) WordNet: an electronic lexical database. MIT Press, Cambridge"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Felzenszwalb P, Girshick R, McAllester D, Ramanan D (2010) Object detection with discriminatively trained part-based models. IEEE Trans Pattern Anal Mach Intell (PAMI\u201910), 32(9)","DOI":"10.1109\/TPAMI.2009.167"},{"key":"6_CR13","unstructured":"Greif T, H\u00f6rster E, Lienhart R (2008) Correlated topic models for image retrieval. Technical Report TR2008\u201309, University of Augsburg"},{"key":"6_CR14","volume-title":"On intelligence","author":"J Hawkins","year":"2004","unstructured":"Hawkins J, Blakeslee S (2004) On intelligence. Times Books, New York"},{"issue":"5786","key":"6_CR15","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton GE, Salakhutdinov RR (2006) Reducing the dimensionality of data with neural networks. Science 313(5786):504\u2013507","journal-title":"Science"},{"issue":"1\u20132","key":"6_CR16","doi-asserted-by":"crossref","first-page":"177","DOI":"10.1023\/A:1007617005950","volume":"42","author":"T Hofmann","year":"2001","unstructured":"Hofmann T (2001) Unsupervised learning by probabilistic latent semantic analysis. Mach Learn 42(1\u20132):177\u2013196","journal-title":"Mach Learn"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"H\u00f6rster E, Lienhart R (2008) Deep networks for image retrieval on large-scale databases. In: ACM international conference on multimedia (MM\u201908), New York, pp 643\u2013646","DOI":"10.1145\/1459359.1459449"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"H\u00f6rster E, Lienhart R, Slaney M (2007) Image retrieval on large-scale image databases. In: ACM international conference on content-based image and video retrieval (CIVR\u201907), pp 17\u201324","DOI":"10.1145\/1282280.1282283"},{"key":"6_CR19","unstructured":"H\u00f6rster E, Lienhart R, Slaney M (2008) Continuous visual vocabulary models for pL-based scene recognition. In: ACM international conference on content-based image and video retrieval (CIVR\u201908), New York, pp 319\u2013328"},{"key":"6_CR20","doi-asserted-by":"crossref","unstructured":"Kennedy L, Naaman M, Ahern S, Nair R, Rattenbury T (2007) How flickr helps us make sense of the world: context and content in community-contributed media collections. In: ACM international conference on multimedia (MM\u201907), New York, pp 631\u2013640","DOI":"10.1145\/1291233.1291384"},{"key":"6_CR21","unstructured":"Lienhart R, Romberg S, H\u00f6rster E (2009) Multilayer pLSA for multimodal image retrieval (CIVR\u201909). In: ACM international conference on image and video retrieval, vol 14"},{"key":"6_CR22","first-page":"1217","volume":"IV","author":"R Lienhart","year":"2007","unstructured":"Lienhart R, Slaney M (2007) pLSA on large scale image databases. IEEE Int Confer Acoust Speech Signal Process (ICASSP\u201907) IV:1217\u20131220","journal-title":"IEEE Int Confer Acoust Speech Signal Process (ICASSP\u201907)"},{"issue":"2","key":"6_CR23","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis (IJCV\u201904) 60(2):91\u2013110","journal-title":"Int J Comput Vis (IJCV\u201904)"},{"key":"6_CR24","doi-asserted-by":"crossref","unstructured":"Monay F, Gatica-Perez D (2004) pLSA-based image auto-annotation: constraining the latent space. In: ACM international conference on multimedia (MM?04), New York, pp 348\u2013351","DOI":"10.1145\/1027527.1027608"},{"key":"6_CR25","first-page":"2161","volume":"2","author":"D Nister","year":"2006","unstructured":"Nister D, Stewenius H (2006) Scalable recognition with a vocabulary tree. IEEE Confer Comput Vis Pattern Recogn (CVPR\u201906) 2:2161\u20132168","journal-title":"IEEE Confer Comput Vis Pattern Recogn (CVPR\u201906)"},{"key":"6_CR26","first-page":"1575","volume":"3613","author":"J Philbin","year":"2007","unstructured":"Philbin J, Chum O, Isard M, Sivic J, Zisserman A (2007) Object retrieval with large vocabularies and fast spatial matching. IEEE Confer Comput Vis Pattern Recogn (CVPR\u201907) 3613:1575\u20131589","journal-title":"IEEE Confer Comput Vis Pattern Recogn (CVPR\u201907)"},{"key":"6_CR27","doi-asserted-by":"crossref","unstructured":"Romberg S, Horster E, Lienhart R (2009) Multimodal pLSA on visual features and tags. In: IEEE international conference on multimedia and expo (ICME\u201909), pp 414\u2013417","DOI":"10.1109\/ICME.2009.5202522"},{"key":"6_CR28","doi-asserted-by":"crossref","unstructured":"Shechtman E, Irani M (2007) Matching local self-similarities across images and videos. In: IEEE conference on computer vision and pattern recognition (CVPR\u201907)","DOI":"10.1109\/CVPR.2007.383198"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Sivic J, Russell BC, Zisserman A, Freeman WT, Efros AA (2008) Unsupervised discovery of visual object class hierarchies. In: IEEE conference on computer vision and pattern recognition (CVPR\u201908)","DOI":"10.1109\/CVPR.2008.4587622"},{"key":"6_CR30","doi-asserted-by":"crossref","unstructured":"Sivic J, Zisserman A (2003) Video Google: a text retrieval approach to object matching in videos. In: International conference on computer vision (ICCV\u201903)","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Xiao J, Hays J, Ehinger K, Oliva A, Torralba A (2010) Sun database: Large-scale scene recognition from abbey to zoo. In: IEEE conference on computer vision and pattern recognition (CVPR\u201910)","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Zhang L, Wang X-j (2011) Multi-Feature pLSA for combining visual features in image annotation. In: ACM international conference on multimedia (MM\u201911), Scottsdale, Arizona, pp 1513\u20131516","DOI":"10.1145\/2072298.2072053"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-012-0006-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13735-012-0006-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-012-0006-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,21]],"date-time":"2024-04-21T04:40:34Z","timestamp":1713674434000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13735-012-0006-4"}},"subtitle":["Fusing modalities with multilayer multimodal pLSA"],"short-title":[],"issued":{"date-parts":[[2012,3,7]]},"references-count":32,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2012,4]]}},"alternative-id":["6"],"URL":"https:\/\/doi.org\/10.1007\/s13735-012-0006-4","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"value":"2192-6611","type":"print"},{"value":"2192-662X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,3,7]]}}}