{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,17]],"date-time":"2026-07-17T23:17:12Z","timestamp":1784330232716,"version":"3.55.0"},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,4,4]],"date-time":"2018-04-04T00:00:00Z","timestamp":1522800000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"NSFC","doi-asserted-by":"crossref","award":["61602089"],"award-info":[{"award-number":["61602089"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001700","name":"Ministry of Education, Culture, Sports, Science and Technology","doi-asserted-by":"publisher","award":["16809746"],"award-info":[{"award-number":["16809746"]}],"id":[{"id":"10.13039\/501100001700","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["World Wide Web"],"published-print":{"date-parts":[[2019,3]]},"DOI":"10.1007\/s11280-018-0541-x","type":"journal-article","created":{"date-parts":[[2018,4,4]],"date-time":"2018-04-04T03:53:55Z","timestamp":1522814035000},"page":"657-672","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":163,"title":["Deep adversarial metric learning for cross-modal retrieval"],"prefix":"10.1007","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5685-3123","authenticated-orcid":false,"given":"Xing","family":"Xu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Li","family":"He","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Huimin","family":"Lu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lianli","family":"Gao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yanli","family":"Ji","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2018,4,4]]},"reference":[{"key":"541_CR1","unstructured":"Andrew, G., Arora, R., Bilmes, J., Livescu, K.: Deep canonical correlation analysis. In: ICML, pp. 1247\u20131255 (2013)"},{"issue":"3","key":"541_CR2","doi-asserted-by":"publisher","first-page":"556","DOI":"10.1109\/TCSVT.2014.2347551","volume":"26","author":"L Chu","year":"2016","unstructured":"Chu, L., Zhang, Y., Li, G., Wang, S., Zhang, W., Huang, Q.: Effective multimodality fusion framework for cross-media topic detection. IEEE Trans. Circuits Syst. Video Technol. 26(3), 556\u2013569 (2016)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"541_CR3","doi-asserted-by":"crossref","unstructured":"Chua, T.-S., Tang, J., Hong, R., Li, H., Luo, Z., Zheng, Y.-T.: Nus-wide: A real-world Web image database from national university of singapore. In: CIVR (2009)","DOI":"10.1145\/1646396.1646452"},{"issue":"3","key":"541_CR4","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1109\/TPAMI.2013.142","volume":"36","author":"J Costa Pereira","year":"2014","unstructured":"Costa Pereira, J., Coviello, E., Doyle, G., Rasiwasia, N., Lanckriet, G., Levy, R., Vasconcelos, N.: On the role of correlation and abstraction in cross-modal multimedia retrieval. TPAMI 36(3), 521\u2013535 (2014)","journal-title":"TPAMI"},{"key":"541_CR5","doi-asserted-by":"crossref","unstructured":"Feng, F., Wang, X., Li, R.: Cross-modal retrieval with correspondence autoencoder. In: ACM MM, pp. 7\u201316. ACM (2014)","DOI":"10.1145\/2647868.2654902"},{"key":"541_CR6","unstructured":"Ganin, Y., Lempitsky, V.S.: Unsupervised domain adaptation by backpropagation. In: ICML, pp. 1180\u20131189 (2015)"},{"issue":"2","key":"541_CR7","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1007\/s11263-013-0658-4","volume":"106","author":"Y Gong","year":"2014","unstructured":"Gong, Y., Ke, Q., Isard, M., Lazebnik, S.: A multi-view embedding space for modeling internet images, tags, and their semantics. IJCV 106(2), 210\u2013233 (2014)","journal-title":"IJCV"},{"key":"541_CR8","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. In: NIPS, pp. 2672\u20132680 (2014)"},{"key":"541_CR9","unstructured":"Gu, Q., Li, Z., Han, J.: Joint feature selection and subspace learning. In: IJCAI (2011)"},{"issue":"12","key":"541_CR10","doi-asserted-by":"publisher","first-page":"2639","DOI":"10.1162\/0899766042321814","volume":"16","author":"D Hardoon","year":"2004","unstructured":"Hardoon, D., Szedmak, S., Shawe-Taylor, J.: Canonical correlation analysis: an overview with application to learning methods. Neural Comput. 16(12), 2639\u20132664 (2004)","journal-title":"Neural Comput."},{"key":"541_CR11","doi-asserted-by":"crossref","unstructured":"He, L., Xu, X., Lu, H., Yang, Y., Shen, H.T.: Unsupervised cross modal retrieval through adversarial learning. In: ICME, pp. 1\u20136 (2017)","DOI":"10.1109\/ICME.2017.8019549"},{"key":"541_CR12","doi-asserted-by":"crossref","unstructured":"Kang, C., Liao, S., He, Y., Wang, J., Niu, W., Xiang, S., Pan, C.: Cross-modal similarity learning: a low rank bilinear formulation. In: CKIM, pp. 1251\u20131260 (2015)","DOI":"10.1145\/2806416.2806469"},{"key":"541_CR13","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv: 1312.6114 (2013)"},{"key":"541_CR14","unstructured":"Larsen, A.B.L., S\u00f8nderby, S.K., Winther, O.: Autoencoding beyond pixels using a learned similarity metric. arXiv: 1512.09300 (2015)"},{"issue":"6","key":"541_CR15","doi-asserted-by":"publisher","first-page":"1234","DOI":"10.1109\/TMM.2016.2646180","volume":"19","author":"VE Liong","year":"2017","unstructured":"Liong, V.E., Lu, J., Tan, Y., Zhou, J.: Deep coupled metric learning for cross-modal matching. IEEE Trans. Multimedia 19(6), 1234\u20131244 (2017)","journal-title":"IEEE Trans. Multimedia"},{"key":"541_CR16","unstructured":"Makhzani, A., Shlens, J., Jaitly, N., Goodfellow, I.: Adversarial Autoencoders. arXiv, pp. 1\u201310 (2015)"},{"key":"541_CR17","unstructured":"Mignon, A., Jurie, F.: CMML: a new metric learning approach for cross modal matching. In: ACCV (2012)"},{"key":"541_CR18","unstructured":"Ngiam, J., Khosla, A., Kim, M., Nam, J., Lee, H., Ng, A.: Multimodal deep learning. In: ICML, pp. 689\u2013696 (2011)"},{"key":"541_CR19","unstructured":"Peng, Y., Huang, X., Qi, J.: Cross-media shared representation by hierarchical learning with multiple deep networks. In: IJCAI, pp. 3846C3853 (2016)"},{"key":"541_CR20","unstructured":"Radford, A., Metz, L., Chintala, S.: Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv: 1511.06434 (2015)"},{"key":"541_CR21","unstructured":"Rashtchian, C., Young, M., Hodosh, P., Hockenmaier, J.: Collecting image annotations using amazon\u2019s mechanical turk. In: NAACL HLT 2010 workshop on creating speech and language data with amazon\u2019s mechanical turk (2010)"},{"key":"541_CR22","doi-asserted-by":"crossref","unstructured":"Rasiwasia, N., Costa Pereira, J., Coviello, E., Doyle, G., Lanckriet, G., Levy, R., Vasconcelos, N.: A new approach to cross-modal multimedia retrieval. In: ACM MM, pp. 251\u2013260. ACM (2010)","DOI":"10.1145\/1873951.1873987"},{"key":"541_CR23","doi-asserted-by":"crossref","unstructured":"Sharma, A., Kumar, A., Daume, H., Jacobs, D.: Generalized multiview analysis: a discriminative latent space. In: CVPR, pp. 2160\u20132167. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6247923"},{"key":"541_CR24","doi-asserted-by":"crossref","unstructured":"Siena, S., Boddeti, V.N., Kumar, B.V.K.V.: Coupled marginal fisher analysis for low-resolution face recognition. In: ECCV workshops and demonstrations, pp. 240\u2013249 (2012)","DOI":"10.1007\/978-3-642-33868-7_24"},{"key":"541_CR25","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. CoRR, arXiv: 1409.1556 (2014)"},{"key":"541_CR26","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1016\/j.patcog.2017.03.021","volume":"75","author":"J Song","year":"2018","unstructured":"Song, J., Gao, L., Liu, L., Zhu, X., Sebe, N.: Quantization-based hashing: a general framework for scalable image and video retrieval. Pattern Recogn. 75, 175\u2013187 (2018)","journal-title":"Pattern Recogn."},{"key":"541_CR27","doi-asserted-by":"crossref","unstructured":"Song, J., Yang, Y., Yang, Y., Huang, Z., Shen, H.T.: Inter-media hashing for large-scale retrieval from heterogeneous data sources. In: ACM SIGMOD, pp. 785\u2013796 (2013)","DOI":"10.1145\/2463676.2465274"},{"issue":"11","key":"541_CR28","doi-asserted-by":"publisher","first-page":"4999","DOI":"10.1109\/TIP.2016.2601260","volume":"25","author":"J Song","year":"2018","unstructured":"Song, J., Zhang, H., Li, X., Gao, L., Wang, M., Hong, R.: Self-supervised video hashing with hierarchical binary auto-encoder. IEEE Trans. Image Process. 25 (11), 4999\u20135011 (2018)","journal-title":"IEEE Trans. Image Process."},{"issue":"2","key":"541_CR29","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1023\/A:1023674722438","volume":"6","author":"Y Song","year":"2003","unstructured":"Song, Y., Wang, W., Zhang, A.: Automatic annotation and retrieval of images. World Wide Web 6(2), 209\u2013231 (2003)","journal-title":"World Wide Web"},{"key":"541_CR30","unstructured":"Srivastava, N., Salakhutdinov, R.: Learning representations for multimodal data with deep belief nets. In: ICML workshop (2012)"},{"key":"541_CR31","unstructured":"Srivastava, N., Salakhutdinov, R.: Multimodal learning with deep boltzmann machines. In: NIPS, pp. 2222\u20132230 (2012)"},{"issue":"C","key":"541_CR32","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1016\/j.neucom.2017.01.064","volume":"238","author":"Q Tian","year":"2017","unstructured":"Tian, Q., Chen, S.: Cross-heterogeneous-database age estimation through correlation representation learning. Neurocomput. 238(C), 286\u2013295 (2017)","journal-title":"Neurocomput."},{"issue":"99","key":"541_CR33","first-page":"15","volume":"26","author":"J Wang","year":"2017","unstructured":"Wang, J., Zhang, T., Song, J., Sebe, N., Shen, H.T.: A survey on learning to hash. IEEE Trans. Pattern Anal. Mach. Intell. 26(99), 15\u201329 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"10","key":"541_CR34","doi-asserted-by":"publisher","first-page":"2010","DOI":"10.1109\/TPAMI.2015.2505311","volume":"38","author":"K Wang","year":"2011","unstructured":"Wang, K., He, R., Wang, L., Wang, W., Tan, T.: Joint feature selection and subspace learning for cross-modal retrieval. TPAMI 38(10), 2010\u20132023 (2011)","journal-title":"TPAMI"},{"key":"541_CR35","doi-asserted-by":"crossref","unstructured":"Wang, K., He, R., Wang, W., Wang, L., Tan, T.: Learning coupled feature spaces for cross-modal matching. In: ICCV, pp. 2088\u20132095 (2013)","DOI":"10.1109\/ICCV.2013.261"},{"key":"541_CR36","unstructured":"Wang, K., Yin, Q., Wang, W., Wu, S., Wang, L.: A Comprehensive Survey on Cross-modal Retrieval. arXiv, pp. 1\u201320 (2016)"},{"issue":"1","key":"541_CR37","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1007\/s00778-015-0391-4","volume":"25","author":"W Wang","year":"2016","unstructured":"Wang, W., Yang, X., Ooi, B.C., Zhang, D., Zhuang, Y.: Effective deep learning-based multi-modal retrieval. VLDB J 25(1), 79\u2013101 (2016)","journal-title":"VLDB J"},{"issue":"99","key":"541_CR38","first-page":"1","volume":"PP","author":"X Wang","year":"2017","unstructured":"Wang, X., Gao, L., Wang, P., Sun, X., Liu, X.: Two-stream 3d convnet fusion for action recognition in videos with arbitrary size and length. IEEE Trans. Multimedia PP(99), 1\u20131 (2017)","journal-title":"IEEE Trans. Multimedia"},{"key":"541_CR39","doi-asserted-by":"crossref","unstructured":"Wu, Y., Wang, S., Huang, Q.: Online asymmetric similarity learning for cross-modal retrieval. In: CVPR, pp. 3984\u20133993 (2017)","DOI":"10.1109\/CVPR.2017.424"},{"key":"541_CR40","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1016\/j.neucom.2015.11.133","volume":"213","author":"X Xu","year":"2016","unstructured":"Xu, X., He, L., Shimada, A., Taniguchi, R., Lu, H.: Learning unified binary codes for cross-modal retrieval via latent semantic hashing. Neurocomputing 213, 191\u2013203 (2016)","journal-title":"Neurocomputing"},{"issue":"5","key":"541_CR41","doi-asserted-by":"publisher","first-page":"2494","DOI":"10.1109\/TIP.2017.2676345","volume":"26","author":"X Xu","year":"2017","unstructured":"Xu, X., Shen, F., Yang, Y., Shen, H.T., Li, X.: Learning discriminative binary codes for large-scale cross-modal retrieval. IEEE Trans. Image Process. 26(5), 2494\u20132507 (2017)","journal-title":"IEEE Trans. Image Process."},{"key":"541_CR42","doi-asserted-by":"crossref","unstructured":"Xu, X., Shimada, A., Taniguchi, R., He, L.: Coupled dictionary learning and feature mapping for cross-modal retrieval. In: ICME, pp. 1\u20136. IEEE (2015)","DOI":"10.1109\/ICME.2015.7177396"},{"key":"541_CR43","doi-asserted-by":"crossref","unstructured":"Xu, X., Yang, Y., Shimada, A., Taniguchi, R., He, L.: Semi-supervised coupled dictionary learning for cross-modal retrieval in internet images and texts. In: ACM MM, pp. 847\u2013850. ACM (2015)","DOI":"10.1145\/2733373.2806346"},{"key":"541_CR44","doi-asserted-by":"crossref","unstructured":"Yan, F., Mikolajczyk, K.: Deep correlation for matching images and text. In: CVPR, pp. 3441\u20133450 (2015)","DOI":"10.1109\/CVPR.2015.7298966"},{"issue":"6","key":"541_CR45","doi-asserted-by":"publisher","first-page":"1677","DOI":"10.1109\/TMM.2014.2323014","volume":"16","author":"Y Yang","year":"2014","unstructured":"Yang, Y., Zha, Z.-J., Gao, Y., Zhu, X., Chua, T.-S.: Exploiting Web images for semantic video indexing via robust sample-specific loss. IEEE Trans. Multimedia 16(6), 1677\u20131689 (2014)","journal-title":"IEEE Trans. Multimedia"},{"key":"541_CR46","doi-asserted-by":"crossref","unstructured":"Yang, Y., Zhang, H., Zhang, M., Shen, F., Li, X.: Visual coding in a semantic hierarchy. In: Proceedings of the 23rd ACM international conference on Multimedia, pp. 59\u201368, ACM (2015)","DOI":"10.1145\/2733373.2806244"},{"key":"541_CR47","doi-asserted-by":"publisher","first-page":"965C","DOI":"10.1109\/TCSVT.2013.2276704","volume":"24","author":"X Zhai","year":"2014","unstructured":"Zhai, X., Peng, Y., Xiao, J.: Learning cross-media joint representation with sparse and semisupervised regularization. IEEE Trans. Circuits Syst. Video Technol. 24, 965C\u2013978 (2014)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"2","key":"541_CR48","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1007\/s11280-015-0342-4","volume":"19","author":"H Zhang","year":"2016","unstructured":"Zhang, H., Gao, X., Wu, P., Xu, X.: A cross-media distance metric learning framework based on multi-view correlation mining and matching. World Wide Web 19(2), 181\u2013197 (2016)","journal-title":"World Wide Web"},{"key":"541_CR49","doi-asserted-by":"crossref","unstructured":"Zhuang, Y., Wang, Y., Wu, F., Zhang, Y., Lu, W.: Supervised coupled dictionary learning with group structures for multi-modal retrieval. In: AAAI (2013)","DOI":"10.1609\/aaai.v27i1.8603"},{"issue":"7","key":"541_CR50","doi-asserted-by":"publisher","first-page":"1006","DOI":"10.1109\/TMM.2015.2425651","volume":"17","author":"F Zou","year":"2015","unstructured":"Zou, F., Chen, Y., Song, J., Zhou, K., Yang, Y., Sebe, N.: Compact image fingerprint via multiple kernel hashing. IEEE Transaction on Multimedia 17(7), 1006\u20131018 (2015)","journal-title":"IEEE Transaction on Multimedia"}],"container-title":["World Wide Web"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-018-0541-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11280-018-0541-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-018-0541-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,17]],"date-time":"2022-08-17T23:57:33Z","timestamp":1660780653000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11280-018-0541-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4,4]]},"references-count":50,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2019,3]]}},"alternative-id":["541"],"URL":"https:\/\/doi.org\/10.1007\/s11280-018-0541-x","relation":{},"ISSN":["1386-145X","1573-1413"],"issn-type":[{"value":"1386-145X","type":"print"},{"value":"1573-1413","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,4,4]]},"assertion":[{"value":"15 August 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 February 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 February 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 April 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}