{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T10:46:11Z","timestamp":1776681971100,"version":"3.51.2"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,3,7]],"date-time":"2018-03-07T00:00:00Z","timestamp":1520380800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"name":"Johannes Kepler University Linz"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2018,6]]},"DOI":"10.1007\/s13735-018-0151-5","type":"journal-article","created":{"date-parts":[[2018,3,7]],"date-time":"2018-03-07T00:50:11Z","timestamp":1520383811000},"page":"117-128","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":35,"title":["End-to-end cross-modality retrieval with CCA projections and pairwise ranking loss"],"prefix":"10.1007","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3806-5411","authenticated-orcid":false,"given":"Matthias","family":"Dorfer","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jan","family":"Schl\u00fcter","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andreu","family":"Vall","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Filip","family":"Korzeniowski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gerhard","family":"Widmer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,3,7]]},"reference":[{"key":"151_CR1","unstructured":"Abadi M, Agarwal A, Barham P, Brevdo E, Chen Z, Citro C, Corrado GS, Davis A, Dean J, Devin M et\u00a0al (2016) Tensorflow: large-scale machine learning on heterogeneous distributed systems. arXiv preprint \n                    arXiv:1603.04467"},{"key":"151_CR2","unstructured":"Andrew G, Arora R, Bilmes J, Livescu K (2013) Deep canonical correlation analysis. In: Proceedings of the international conference on machine learning, pp 1247\u20131255"},{"key":"151_CR3","unstructured":"Boulanger-Lewandowski N, Bengio Y, Vincent P (2012) Modeling temporal dependencies in high-dimensional sequences: application to polyphonic music generation and transcription. In: Proceedings of the 29th international conference on machine learning (ICML-12), pp 1159\u20131166"},{"key":"151_CR4","doi-asserted-by":"crossref","unstructured":"Chatfield K, Simonyan K, Vedaldi A, Zisserman A (2014) Return of the devil in the details: delving deep into convolutional nets. In: British machine vision conference","DOI":"10.5244\/C.28.6"},{"key":"151_CR5","unstructured":"Chung J, G\u00fcl\u00e7ehre \u00c7, Cho K, Bengio Y (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. CoRR, abs\/1412.3555"},{"key":"151_CR6","unstructured":"Clevert D, Unterthiner T, Hochreiter S (2015) Fast and accurate deep network learning by exponential linear units (elus). In: International conference on learning representations (ICLR). \n                    arXiv:1511.07289"},{"key":"151_CR7","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) ImageNet: a large-scale hierarchical image database. In: CVPR09","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"151_CR8","unstructured":"Dorfer M, Arzt A, Widmer G (2016) Towards score following in sheet music images. In: Proceedings of the international society for music information retrieval conference (ISMIR)"},{"issue":"12","key":"151_CR9","doi-asserted-by":"publisher","first-page":"2639","DOI":"10.1162\/0899766042321814","volume":"16","author":"DR Hardoon","year":"2004","unstructured":"Hardoon DR, Szedmak S, Shawe-Taylor J (2004) Canonical correlation analysis: an overview with application to learning methods. Neural Comput 16(12):2639\u20132664","journal-title":"Neural Comput"},{"key":"151_CR10","unstructured":"Hermann KM, Blunsom P (2013) Multilingual distributed representations without word alignment. arXiv preprint \n                    arXiv:1312.6173"},{"key":"151_CR11","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. CoRR, abs\/1502.03167"},{"key":"151_CR12","doi-asserted-by":"crossref","unstructured":"Karpathy A, Fei-Fei L (2015) Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3128\u20133137","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"151_CR13","unstructured":"Karpathy A, Joulin A, Li FFF (2014) Deep fragment embeddings for bidirectional image sentence mapping. In: Advances in neural information processing systems, pp 1889\u20131897"},{"key":"151_CR14","unstructured":"Kingma D, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint \n                    arXiv:1412.6980"},{"key":"151_CR15","unstructured":"Kiros R, Salakhutdinov R, Zemel RS (2014) Unifying visual-semantic embeddings with multimodal neural language models. arXiv preprint \n                    arXiv:1411.2539"},{"key":"151_CR16","unstructured":"Lin M, Chen Q, Yan S (2013) Network in network. CoRR, abs\/1312.4400"},{"issue":"2","key":"151_CR17","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1017\/S0266466600011129","volume":"1","author":"JR Magnus","year":"1985","unstructured":"Magnus JR (1985) On differentiating eigenvalues and eigenvectors. Econom Theory 1(2):179\u2013191","journal-title":"Econom Theory"},{"key":"151_CR18","unstructured":"Mao J, Xu W, Yang Y, Wang J, Yuille AL (2014) Explain images with multimodal recurrent neural networks. arXiv preprint \n                    arXiv:1410.1090"},{"key":"151_CR19","series-title":"Probability and mathematical statistics","volume-title":"Multivariate analysis","author":"KV Mardia","year":"1979","unstructured":"Mardia KV, Kent JT, Bibby JM (1979) Multivariate analysis. Probability and mathematical statistics. Academic Press, London"},{"key":"151_CR20","unstructured":"Mikolov T, Sutskever I, Chen K, Corrado GS, Dean J (2013) Distributed representations of words and phrases and their compositionality. In: Advances in neural information processing systems, pp 3111\u20133119"},{"key":"151_CR21","doi-asserted-by":"crossref","unstructured":"Nilsback M-E, Zisserman A (2008) Automated flower classification over a large number of classes. In: Proceedings of the Indian conference on computer vision, graphics and image processing","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"151_CR22","doi-asserted-by":"crossref","unstructured":"Papadopoulo T, Lourakis MIA (2000) Estimating the Jacobian of the singular value decomposition: theory and applications. In: Proceedings of the 6th European conference on computer vision (ECCV)","DOI":"10.1007\/3-540-45054-8_36"},{"issue":"3","key":"151_CR23","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1109\/TPAMI.2013.142","volume":"36","author":"JC Pereira","year":"2014","unstructured":"Pereira JC, Coviello E, Doyle G, Rasiwasia N, Lanckriet GRG, Levy R, Vasconcelos N (2014) On the role of correlation and abstraction in cross-modal multimedia retrieval. IEEE Trans Pattern Anal Mach Intell 36(3):521\u2013535","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"151_CR24","unstructured":"Petersen KB, Pedersen MS (2012) The matrix cookbook, nov 2012. Version 20121115"},{"key":"151_CR25","unstructured":"Reed S, Akata Z, Schiele B, Lee H (2016) Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE conference on computer vision and pattern recognition"},{"key":"151_CR26","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint \n                    arXiv:1409.1556"},{"key":"151_CR27","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1162\/tacl_a_00177","volume":"2","author":"R Socher","year":"2014","unstructured":"Socher R, Karpathy A, Le QV, Manning CD, Ng. AY (2014) Grounded compositional semantics for finding and describing images with sentences. Trans Assoc Comput Linguist 2:207\u2013218","journal-title":"Trans Assoc Comput Linguist"},{"key":"151_CR28","unstructured":"Theano Development Team (2016) Theano: a Python framework for fast computation of mathematical expressions. arXiv e-prints, abs\/1605.02688, May 2016"},{"key":"151_CR29","unstructured":"Vendrov I, Kiros R, Fidler S, Urtasun R (2016) Order-embeddings of images and language. CoRR, abs\/1511.06361"},{"key":"151_CR30","unstructured":"Welinder P, Branson S, Mita T, Wah C, Schroff F, Belongie S, Perona P (2010) Caltech-UCSD Birds 200. Technical report CNS-TR-2010-001, California Institute of Technology"},{"key":"151_CR31","doi-asserted-by":"crossref","unstructured":"Yan F, Mikolajczyk K (2015) Deep correlation for matching images and text. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3441\u20133450","DOI":"10.1109\/CVPR.2015.7298966"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13735-018-0151-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-018-0151-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-018-0151-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,6]],"date-time":"2019-03-06T20:11:24Z","timestamp":1551903084000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13735-018-0151-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,3,7]]},"references-count":31,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2018,6]]}},"alternative-id":["151"],"URL":"https:\/\/doi.org\/10.1007\/s13735-018-0151-5","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"value":"2192-6611","type":"print"},{"value":"2192-662X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,3,7]]},"assertion":[{"value":"8 November 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 February 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 March 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}