{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,4,2]],"date-time":"2022-04-02T22:25:11Z","timestamp":1648938311628},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"21","license":[{"start":{"date-parts":[[2017,5,9]],"date-time":"2017-05-09T00:00:00Z","timestamp":1494288000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2017,11]]},"DOI":"10.1007\/s11042-017-4733-7","type":"journal-article","created":{"date-parts":[[2017,5,9]],"date-time":"2017-05-09T00:37:10Z","timestamp":1494290230000},"page":"22405-22423","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Uni- and multimodal methods for single- and multi-label recognition"],"prefix":"10.1007","volume":"76","author":[{"given":"Satoru","family":"Ishikawa","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jorma","family":"Laaksonen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,5,9]]},"reference":[{"key":"4733_CR1","doi-asserted-by":"crossref","unstructured":"Arandjelovi\u0107 R, Zisserman A (2013) All about VLAD. IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2013.207"},{"issue":"1","key":"4733_CR2","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1109\/TIP.2015.2496275","volume":"25","author":"S Bahrampour","year":"2016","unstructured":"Bahrampour S, Nasrabadi NM, Ray A, Jenkins WK (2016) Multimodal task-driven dictionary learning for image classification. IEEE Trans Image Process 25 (1):24\u201338","journal-title":"IEEE Trans Image Process"},{"key":"4733_CR3","unstructured":"Bhatia K, Jain H, Kar P, Varma M, Jain P (2015) Sparse local embeddings for extreme multi-label classification. In: Cortes C, Lawrence ND, Lee DD, Sugiyama M, Garnett R (eds) Advances in neural information processing systems 28. Curran Associates, Inc, pp 730\u2013738. http:\/\/papers.nips.cc\/paper\/5969-sparse-local-embeddings-for-extreme-multi-label-classification.pdf"},{"key":"4733_CR4","first-page":"401","volume-title":"Proceedings of ACM ICVR 2007","author":"A Bosch","year":"2007","unstructured":"Bosch A, Zisserman A, Munoz X (2007) Representing shape with a spatial pyramid kernel Proceedings of ACM ICVR 2007, pp 401\u2013408"},{"key":"4733_CR5","first-page":"185","volume-title":"Advances in neural information processing systems","author":"JL Boyd-Graber","year":"2009","unstructured":"Boyd-Graber JL, Blei DM (2009) Syntactic topic models Advances in neural information processing systems, pp 185\u2013192"},{"key":"4733_CR6","doi-asserted-by":"crossref","first-page":"27:1","DOI":"10.1145\/1961189.1961199","volume":"2","author":"CC Chang","year":"2011","unstructured":"Chang CC, Lin CJ (2011) LIBSVM: a library for support vector machines. ACM Trans Intell Syst Technol 2:27:1\u201327:27","journal-title":"ACM Trans Intell Syst Technol"},{"key":"4733_CR7","doi-asserted-by":"crossref","unstructured":"Chua TS, Tang J, Hong R, Li H, Luo Z, Zheng YT (2009) Nus-wide: A real-world web image database from national university of Singapore Proceedings of ACM conference on image and video retrieval (CIVR\u201909). santorini, Greece","DOI":"10.1145\/1646396.1646452"},{"key":"4733_CR8","unstructured":"Donahue J, Jia Y, Vinyals O, Hoffman J, Zhang N, Tzeng E, Darrell T (2014) deCAF: A deep convolutional activation feature for generic visual recognition ICML 2014"},{"key":"4733_CR9","unstructured":"Erk K, Pad\u00f3 S (2008) A structured vector space model for word meaning in context Proceedings of the conference on empirical methods in natural language processing. Association for Computational Linguistics, pp 897\u2013906"},{"issue":"72","key":"4733_CR10","first-page":"1","volume":"17","author":"S Escalera","year":"2016","unstructured":"Escalera S, Athitsos V, Guyon I (2016) Challenges in multimodal gesture recognition. J Mach Learn Res 17(72):1\u201354","journal-title":"J Mach Learn Res"},{"key":"4733_CR11","first-page":"1871","volume":"9","author":"R Fan","year":"2008","unstructured":"Fan R, Chang K, Hsieh C, Wang X, Lin C (2008) LIBLINEAR: A library for large linear classification. J Mach Learn Res 9:1871\u20131874","journal-title":"J Mach Learn Res"},{"key":"4733_CR12","unstructured":"Gong Y, Ke Q, Isard M, Lazebnik S (2012) A multi-view embedding space for modeling internet images, tags, and their semantics. CoRR arXiv: 1212.4522"},{"key":"4733_CR13","doi-asserted-by":"crossref","unstructured":"Gong Y, Wang L, Guo R, Lazebnik S (2014) Multi-scale orderless pooling of deep convolutional activation features. arXiv: 1403.1840","DOI":"10.1007\/978-3-319-10584-0_26"},{"key":"4733_CR14","first-page":"529","volume-title":"European conference on computer vision","author":"Y Gong","year":"2014","unstructured":"Gong Y, Wang L, Hodosh M, Hockenmaier J, Lazebnik S (2014) Improving image-sentence embeddings using large weakly annotated photo collections European conference on computer vision, pp 529\u2013545"},{"key":"4733_CR15","doi-asserted-by":"crossref","unstructured":"Guillaumin M, Verbeek J, Schmid C (2010) Multimodal semi-supervised learning for image classification. In: CVPR 2010 - 23Rd IEEE conference on computer vision & pattern recognition. IEEE Computer Society, San Francisco, USA, pp 902\u2013909","DOI":"10.1109\/CVPR.2010.5540120"},{"key":"4733_CR16","doi-asserted-by":"publisher","unstructured":"Habibian A, van de Sande KE, Snoek CG (2013) Recommendations for video event recognition using concept vocabularies. In: Proceedings of the 3rd ACM conference on international conference on multimedia retrieval, ICMR \u201913, pp 89\u201396. ACM, New York, NY, USA. doi: 10.1145\/2461466.2461482","DOI":"10.1145\/2461466.2461482"},{"key":"4733_CR17","doi-asserted-by":"crossref","first-page":"1771","DOI":"10.1162\/089976602760128018","volume":"14","author":"GE Hinton","year":"2002","unstructured":"Hinton GE (2002) Training products of experts by minimizing contrastive divergence. Neural Comput 14:1771\u20131800","journal-title":"Neural Comput"},{"key":"4733_CR18","doi-asserted-by":"crossref","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton GE, Osindero S, Teh Y (2006) A fast learning algorithm for deep belief nets. Neural Comput 18:1527\u20131554","journal-title":"Neural Comput"},{"key":"4733_CR19","unstructured":"Hinton GE, Salakhutdinov R (2009) Replicated softmax: an undirected topic model. In: Bengio Y, Schuurmans D, Lafferty JD, Williams CKI, Culotta A (eds) Advances in neural information processing systems 22. Curran Associates, Inc, pp 1607\u20131614. http:\/\/papers.nips.cc\/paper\/3856-replicated-softmax-an-undirected-topic-model.pdf"},{"key":"4733_CR20","doi-asserted-by":"crossref","unstructured":"Huiskes MJ, Lew MS (2008) The MIR Flickr retrieval evaluation","DOI":"10.1145\/1460096.1460104"},{"key":"4733_CR21","doi-asserted-by":"crossref","unstructured":"Ishikawa S, Laaksonen J (2016) Comparing and combining unimodal methods for multimodal recognition Proceedings of the 14th international workshop on content-based multimedia indexing (CBMI). bucharest, Romania","DOI":"10.1109\/CBMI.2016.7500253"},{"key":"4733_CR22","doi-asserted-by":"crossref","unstructured":"Jegou H, Douze M, Schmid C, Perez P (2010) Aggregating local descriptors into a compact image representation. In: 2010 IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"4733_CR23","doi-asserted-by":"crossref","unstructured":"Jia Y (2013) Caffe: An open source convolutional architecture for fast feature embedding http:\/\/caffe.berkeleyvision.org\/","DOI":"10.1145\/2647868.2654889"},{"issue":"4","key":"4733_CR24","doi-asserted-by":"crossref","first-page":"294","DOI":"10.1016\/j.is.2011.09.004","volume":"37","author":"S Kara","year":"2012","unstructured":"Kara S, Alan \u00d6, Sabuncu O, Akp\u0131nar S, Cicekli NK, Alpaslan FN (2012) An ontology-based retrieval system using semantic indexing. Inf Syst 37(4):294\u2013305","journal-title":"Inf Syst"},{"key":"4733_CR25","doi-asserted-by":"crossref","unstructured":"Koskela M, Laaksonen J (2014) Convolutional network features for scene recognition Proceedings of the 22nd ACM international conference on multimedia. Orlando, Florida","DOI":"10.1145\/2647868.2655024"},{"key":"4733_CR26","first-page":"2482","volume-title":"Proceedings of the 33rd international conference on machine learning","author":"C Li","year":"2016","unstructured":"Li C, Wang B, Pavlu V, Aslam J (2016) Conditional bernoulli mixtures for multi-label classification Proceedings of the 33rd international conference on machine learning, pp 2482\u20132491"},{"key":"4733_CR27","first-page":"1378","volume-title":"Advances in neural information processing systems","author":"LJ Li","year":"2010","unstructured":"Li LJ, Su H, Fei-Fei L, Xing EP (2010) Object bank: a high-level image representation for scene classification & semantic feature sparsification Advances in neural information processing systems, pp 1378\u20131386"},{"issue":"2","key":"4733_CR28","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis 60(2):91\u2013110","journal-title":"Int J Comput Vis"},{"issue":"6","key":"4733_CR29","doi-asserted-by":"crossref","first-page":"703","DOI":"10.1109\/76.927424","volume":"11","author":"BS Manjunath","year":"2001","unstructured":"Manjunath BS, Ohm JR, Vasudevan VV, Yamada A (2001) Color and texture descriptors. IEEE Trans Circuits Syst Video Technol 11(6):703\u2013715","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"issue":"1","key":"4733_CR30","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1109\/TMM.2011.2168948","volume":"14","author":"M Merler","year":"2012","unstructured":"Merler M, Huang B, Xie L, Hua G, Natsev A (2012) Semantic model vectors for complex video event recognition. Trans Multi 14(1):88\u2013101. doi: 10.1109\/TMM.2011.2168948","journal-title":"Trans Multi"},{"key":"4733_CR31","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. CoRR arXiv: 1301..3781"},{"issue":"3","key":"4733_CR32","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1023\/A:1011139631724","volume":"42","author":"A Oliva","year":"2001","unstructured":"Oliva A, Torralba A (2001) Modeling the shape of the scene: A holistic representation of the spatial envelope. Int J Comput Vis 42(3):145\u2013175. doi: 10.1023\/A:1011139631724","journal-title":"Int J Comput Vis"},{"key":"4733_CR33","doi-asserted-by":"publisher","unstructured":"Sj\u00f6berg M, Laaksonen J (2014) Using semantic features to improve large-scale visual concept detection Proceedings of the 12th International Workshop on Content Based Multimedia Indexing (CBMI 2014), pp 1\u20136. IEEE, Klagenfurt, Austria. doi: 10.1109\/CBMI.2014.6849817","DOI":"10.1109\/CBMI.2014.6849817"},{"key":"4733_CR34","first-page":"2222","volume-title":"Advances in neural information processing systems. 2012","author":"N Srivastava","year":"2012","unstructured":"Srivastava N, Salakhutdinov R (2012) Multimodal learning with deep boltzmann machines Advances in neural information processing systems 2012, pp 2222\u20132230"},{"key":"4733_CR35","first-page":"2949","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Salakhutdinov R (2014) Multimodal learning with deep boltzmann machines. J Mach Learn Res 15:2949\u20132980","journal-title":"J Mach Learn Res"},{"key":"4733_CR36","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2014) Going deeper with convolutions. arXiv: 1409.4842"},{"key":"4733_CR37","doi-asserted-by":"crossref","unstructured":"Torresani L, Szummer M, Fitzgibbon A (2010) Efficient object category recognition using classemes European Conference on Computer Vision (ECCV), pp 776\u2013789. http:\/\/research.microsoft.com\/pubs\/136846\/TorresaniSzummerFitzgibbon-classemes-eccv10.pdf","DOI":"10.1007\/978-3-642-15549-9_56"},{"key":"4733_CR38","unstructured":"Vedaldi A, Fulkerson B VLFeat: A library of computer vision algorithms. http:\/\/www.vlfeat.org\/"},{"key":"4733_CR39","doi-asserted-by":"crossref","unstructured":"Vedaldi A, Zisserman A (2010) Efficient additive kernels via explicit feature maps Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR 2010)","DOI":"10.1109\/CVPR.2010.5539949"},{"key":"4733_CR40","volume-title":"Image annotation with tagprop on the mirflickr set","author":"JJ Verbeek","year":"2010","unstructured":"Verbeek JJ, Guillaumin M, Mensink T, Schmid C (2010) Image annotation with tagprop on the mirflickr set. ACM, New York, NY, USA"},{"key":"4733_CR41","doi-asserted-by":"crossref","unstructured":"van de Weijer J, Schmid C (2006) Coloring local feature extraction Proceedings ECCV 2006","DOI":"10.1007\/11744047_26"},{"key":"4733_CR42","doi-asserted-by":"crossref","unstructured":"Zhang H, Shang X, Luan H, Wang M, Chua TS (2016) Learning from collective intelligence: Feature learning using largely social images and tags. In: ACM transactions on multimedia computing, communications and applications","DOI":"10.1145\/2978656"},{"key":"4733_CR43","unstructured":"Zhao F, Huang Y, Wang L, Tan T (2015) Deep semantic ranking based hashing for multi-label image retrieval The IEEE conference on computer vision and pattern recognition (CVPR)"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-017-4733-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-017-4733-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-017-4733-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,23]],"date-time":"2019-09-23T19:19:14Z","timestamp":1569266354000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-017-4733-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5,9]]},"references-count":43,"journal-issue":{"issue":"21","published-print":{"date-parts":[[2017,11]]}},"alternative-id":["4733"],"URL":"https:\/\/doi.org\/10.1007\/s11042-017-4733-7","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,5,9]]}}}