{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T17:39:08Z","timestamp":1743097148665,"version":"3.40.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319518138"},{"type":"electronic","value":"9783319518145"}],"license":[{"start":{"date-parts":[[2016,12,31]],"date-time":"2016-12-31T00:00:00Z","timestamp":1483142400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-51814-5_12","type":"book-chapter","created":{"date-parts":[[2016,12,30]],"date-time":"2016-12-30T08:31:44Z","timestamp":1483086704000},"page":"136-147","source":"Crossref","is-referenced-by-count":1,"title":["Deep Convolutional Neural Network for Bidirectional Image-Sentence Mapping"],"prefix":"10.1007","author":[{"given":"Tianyuan","family":"Yu","sequence":"first","affiliation":[]},{"given":"Liang","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Jinlin","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Zheng","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Yuxiang","family":"Xie","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,12,31]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Jeon, J., Lavrenko, V., Manmatha, R.: Automatic image annotation and retrieval using cross-media relevance models. In: Proceedings of the 26th Annual International ACM SIGIR Conference on Research and Development in Informaion Retrieval, pp. 119\u2013126. ACM (2003)","DOI":"10.1145\/860435.860459"},{"key":"12_CR2","unstructured":"Srivastava, N., Salakhutdinov, R.R.: Multimodal learning with deep Boltzmann machines. In: Advances in Neural Information Processing Systems, pp. 2222\u20132230 (2012)"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Wu, F., Lu, X., Zhang, Z., et al.: Cross-media semantic representation via bi-directional learning to rank. In: Proceedings of the 21st ACM International Conference on Multimedia, pp. 877\u2013886. ACM (2013)","DOI":"10.1145\/2502081.2502097"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., et al.: Show and tell: a neural image caption generator. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3156\u20133164 (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Malinowski, M., Rohrbach, M., Fritz, M.: Ask your neurons: a neural-based approach to answering questions about images. In: IEEE International Conference on Computer Vision, pp. 1\u20139. IEEE (2015)","DOI":"10.1109\/ICCV.2015.9"},{"key":"12_CR6","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, pp. 1097\u20131105 (2012)"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., et al.: Going deeper with convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Xu, Z., Yang, Y., Hauptmann, A.G.: A discriminative CNN video representation for event detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1798\u20131807 (2015)","DOI":"10.1109\/CVPR.2015.7298789"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Paulin, M., Douze, M., Harchaoui, Z., et al.: Local convolutional features with unsupervised training for image retrieval. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 91\u201399 (2015)","DOI":"10.1109\/ICCV.2015.19"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Matsuo, S., Yanai, K.: CNN-based style vector for style image retrieval. In: Proceedings of the 2016 ACM on International Conference on Multimedia Retrieval, pp. 309\u2013312. ACM (2016)","DOI":"10.1145\/2911996.2912057"},{"key":"12_CR12","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1162\/tacl_a_00177","volume":"2","author":"R Socher","year":"2014","unstructured":"Socher, R., Karpathy, A., Le, Q.V., et al.: Grounded compositional semantics for finding and describing images with sentences. Trans. Assoc. Comput. Linguist. 2, 207\u2013218 (2014)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Zhuang, Y., Yu, Z., Wang, W., et al.: Cross-media hashing with neural networks. In: Proceedings of the ACM International Conference on Multimedia, pp. 901\u2013904. ACM (2014)","DOI":"10.1145\/2647868.2655059"},{"key":"12_CR14","doi-asserted-by":"crossref","first-page":"853","DOI":"10.1613\/jair.3994","volume":"47","author":"M Hodosh","year":"2013","unstructured":"Hodosh, M., Young, P., Hockenmaier, J.: Framing image description as a ranking task: data, models and evaluation metrics. J. Artif. Intell. Res. 47, 853\u2013899 (2013)","journal-title":"J. Artif. Intell. Res."},{"issue":"12","key":"12_CR15","doi-asserted-by":"crossref","first-page":"2639","DOI":"10.1162\/0899766042321814","volume":"16","author":"DR Hardoon","year":"2004","unstructured":"Hardoon, D.R., Szedmak, S., Shawe-Taylor, J.: Canonical correlation analysis: an overview with application to learning methods. Neural Comput. 16(12), 2639\u20132664 (2004)","journal-title":"Neural Comput."},{"key":"12_CR16","doi-asserted-by":"crossref","unstructured":"Ballan, L., Uricchio, T., Seidenari, L., et al.: A cross-media model for automatic image annotation. In: Proceedings of International Conference on Multimedia Retrieval, p. 73. ACM (2014)","DOI":"10.1145\/2578726.2578728"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Wang, Y., Wu, F., Song, J., et al.: Multi-modal mutual topic reinforce modeling for cross-media retrieval. In: Proceedings of the 22nd ACM International Conference on Multimedia, pp. 307\u2013316. ACM (2014)","DOI":"10.1145\/2647868.2654901"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Blei, D.M., Jordan, M.I.: Modeling annotated data. In: Proceedings of the 26th Annual International ACM SIGIR Conference on Research and Development in Informaion Retrieval, pp. 127\u2013134. ACM (2003)","DOI":"10.1145\/860435.860460"},{"issue":"3","key":"12_CR19","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1109\/TPAMI.2013.142","volume":"36","author":"JC Pereira","year":"2014","unstructured":"Pereira, J.C., Coviello, E., Doyle, G., et al.: On the role of correlation and abstraction in cross-modal multimedia retrieval. IEEE Trans. Pattern Anal. Mach. Intell. 36(3), 521\u2013535 (2014)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"12_CR20","unstructured":"Frome, A., Corrado, G.S., Shlens, J., et al.: Devise: a deep visual-semantic embedding model. In: Advances in Neural Information Processing Systems, pp. 2121\u20132129 (2013)"},{"key":"12_CR21","unstructured":"Karpathy, A., Joulin, A., Li, F.F.F.: Deep fragment embeddings for bidirectional image sentence mapping. In: Advances in Neural Information Processing Systems, pp. 1889\u20131897 (2014)"},{"key":"12_CR22","unstructured":"Gao, J., Deng, L., Gamon, M., et al.: Modeling interestingness with deep neural networks: U.S. Patent 20,150,363,688, 17 December 2015"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Huang, P.S., He, X., Gao, J., et al.: Learning deep structured semantic models for web search using clickthrough data. In: Proceedings of the 22nd ACM International Conference on Information & Knowledge Management, pp. 2333\u20132338. ACM (2013)","DOI":"10.1145\/2505515.2505665"},{"key":"12_CR24","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., Kavukcuoglu, K.: Spatial transformer networks. In: Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems, (NIPS 2015), pp. 2017\u20132025 (2015)"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-51814-5_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,16]],"date-time":"2019-09-16T22:39:04Z","timestamp":1568673544000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-51814-5_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,12,31]]},"ISBN":["9783319518138","9783319518145"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-51814-5_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016,12,31]]}}}