{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:25:43Z","timestamp":1742912743775,"version":"3.40.3"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030208721"},{"type":"electronic","value":"9783030208738"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-20873-8_33","type":"book-chapter","created":{"date-parts":[[2019,5,25]],"date-time":"2019-05-25T20:32:03Z","timestamp":1558816323000},"page":"513-529","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["X-GACMN: An X-Shaped Generative Adversarial Cross-Modal Network with Hypersphere Embedding"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2830-3962","authenticated-orcid":false,"given":"Weikuo","family":"Guo","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3890-1894","authenticated-orcid":false,"given":"Jian","family":"Liang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0851-6752","authenticated-orcid":false,"given":"Xiangwei","family":"Kong","sequence":"additional","affiliation":[]},{"given":"Lingxiao","family":"Song","sequence":"additional","affiliation":[]},{"given":"Ran","family":"He","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,5,26]]},"reference":[{"key":"33_CR1","doi-asserted-by":"crossref","unstructured":"Chua, T.S., Tang, J., Hong, R., Li, H., Luo, Z., Zheng, Y.: NUS-WIDE: a real-world web image database from national University of Singapore. In: Proceedings of the CIVR, pp. 48:1\u201348:9 (2009)","DOI":"10.1145\/1646396.1646452"},{"key":"33_CR2","doi-asserted-by":"crossref","unstructured":"Eisenschtat, A., Wolf, L.: Linking image and text with 2-way nets. In: Proceedings of the CVPR, pp. 4601\u20134611 (2017)","DOI":"10.1109\/CVPR.2017.201"},{"key":"33_CR3","doi-asserted-by":"crossref","unstructured":"Erin Liong, V., Lu, J., Tan, Y.P., Zhou, J.: Cross-modal deep variational hashing. In: Proceedings of the ICCV, pp. 4077\u20134085 (2017)","DOI":"10.1109\/ICCV.2017.439"},{"key":"33_CR4","doi-asserted-by":"crossref","unstructured":"Feng, F., Wang, X., Li, R.: Cross-modal retrieval with correspondence autoencoder. In: Proceedings of the ACM MM, pp. 7\u201316 (2014)","DOI":"10.1145\/2647868.2654902"},{"key":"33_CR5","unstructured":"Frome, A., et al.: Devise: a deep visual-semantic embedding model. In: Proceedings of the NIPS, pp. 2121\u20132129 (2013)"},{"issue":"2","key":"33_CR6","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1007\/s11263-013-0658-4","volume":"106","author":"Y Gong","year":"2014","unstructured":"Gong, Y., Ke, Q., Isard, M., Lazebnik, S.: A multi-view embedding space for modeling internet images, tags, and their semantics. IJCV 106(2), 210\u2013233 (2014)","journal-title":"IJCV"},{"key":"33_CR7","unstructured":"Goodfellow, I.J., et al.: Generative adversarial nets. In: Proceedings of the NIPS, pp. 2672\u20132680 (2014)"},{"issue":"8","key":"33_CR8","doi-asserted-by":"publisher","first-page":"1371","DOI":"10.1109\/TPAMI.2007.70791","volume":"30","author":"D Grangier","year":"2008","unstructured":"Grangier, D., Bengio, S.: A discriminative kernel-based approach to rank images from text queries. IEEE TPAMI 30(8), 1371\u20131384 (2008)","journal-title":"IEEE TPAMI"},{"key":"33_CR9","doi-asserted-by":"crossref","unstructured":"Gu, J., Cai, J., Joty, S.R., Niu, L., Wang, G.: Look, imagine and match: improving textual-visual cross-modal retrieval with generative models. In: Proceedings of the CVPR, pp. 7181\u20137189 (2018)","DOI":"10.1109\/CVPR.2018.00750"},{"issue":"12","key":"33_CR10","doi-asserted-by":"publisher","first-page":"2639","DOI":"10.1162\/0899766042321814","volume":"16","author":"DR Hardoon","year":"2004","unstructured":"Hardoon, D.R., Szedmak, S., Shawe-Taylor, J.: Canonical correlation analysis: an overview with application to learning methods. Neural Comput. 16(12), 2639\u20132664 (2004)","journal-title":"Neural Comput."},{"key":"33_CR11","doi-asserted-by":"crossref","unstructured":"Hu, R., Andreas, J., Rohrbach, M., Darrell, T., Saenko, K.: Learning to reason: end-to-end module networks for visual question answering. In: Proceedings of the ICCV, pp. 804\u2013813 (2017)","DOI":"10.1109\/ICCV.2017.93"},{"key":"33_CR12","doi-asserted-by":"crossref","unstructured":"Jiang, Q.Y., Li, W.J.: Deep cross-modal hashing. In: Proceedings of the CVPR, pp. 3270\u20133278 (2017)","DOI":"10.1109\/CVPR.2017.348"},{"key":"33_CR13","first-page":"1","volume":"99","author":"Y Li","year":"2018","unstructured":"Li, Y., Zhang, J., Huang, K., Zhang, J.: Mixed supervised object detection with robust objectness transfer. IEEE TPAMI 99, 1\u201318 (2018)","journal-title":"IEEE TPAMI"},{"key":"33_CR14","doi-asserted-by":"crossref","unstructured":"Liang, J., Cao, D., He, R., Sun, Z., Tan, T.: Principal affinity based cross-modal retrieval. In: Proceedings of the ACPR, pp. 126\u2013130 (2015)","DOI":"10.1109\/ACPR.2015.7486479"},{"key":"33_CR15","unstructured":"Liang, J., He, R., Sun, Z., Tan, T.: Group-invariant cross-modal subspace learning. In: Proceedings of the IJCAI, pp. 1739\u20131745 (2016)"},{"key":"33_CR16","doi-asserted-by":"crossref","unstructured":"Liu, W., Wen, Y., Yu, Z., Li, M., Raj, B., Song, L.: SphereFace: deep hypersphere embedding for face recognition. In: Proceedings of the CVPR, pp. 212\u2013220 (2017)","DOI":"10.1109\/CVPR.2017.713"},{"key":"33_CR17","unstructured":"Liu, W., Wen, Y., Yu, Z., Yang, M.: Large-margin softmax loss for convolutional neural networks. In: Proceedings of the ICML, pp. 507\u2013516 (2016)"},{"key":"33_CR18","doi-asserted-by":"crossref","unstructured":"Lu, J., Xiong, C., Parikh, D., Socher, R.: Knowing when to look: adaptive attention via a visual sentinel for image captioning. In: Proceedings of the CVPR, pp. 3242\u20133250 (2017)","DOI":"10.1109\/CVPR.2017.345"},{"key":"33_CR19","doi-asserted-by":"crossref","unstructured":"Lu, X., Wu, F., Tang, S., Zhang, Z., He, X., Zhuang, Y.: A low rank structural large margin method for cross-modal ranking. In: Proceedings of the SIGIR, pp. 433\u2013442 (2013)","DOI":"10.1145\/2484028.2484039"},{"key":"33_CR20","unstructured":"Ngiam, J., Khosla, A., Kim, M., Nam, J., Lee, H., Ng, A.Y.: Multimodal deep learning. In: Proceedings of the ICML, pp. 689\u2013696 (2011)"},{"key":"33_CR21","unstructured":"Peng, Y., Huang, X., Qi, J.: Cross-media shared representation by hierarchical learning with multiple deep networks. In: Proceedings of the IJCAI, pp. 3846\u20133853 (2016)"},{"issue":"2","key":"33_CR22","first-page":"405","volume":"20","author":"Y Peng","year":"2017","unstructured":"Peng, Y., Qi, J., Huang, X., Yuan, Y.: CCL: cross-modal correlation learning with multi-grained fusion by hierarchical network. IEEE TMM 20(2), 405\u2013420 (2017)","journal-title":"IEEE TMM"},{"key":"33_CR23","unstructured":"Peng, Y., Qi, J., Yuan, Y.: CM-GANs: cross-modal generative adversarial networks for common representation learning. arXiv preprint arxiv:1710.05106 (2017)"},{"issue":"3","key":"33_CR24","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1109\/TPAMI.2013.142","volume":"36","author":"JC Pereira","year":"2014","unstructured":"Pereira, J.C., et al.: On the role of correlation and abstraction in cross-modal multimedia retrieval. IEEE TPAMI 36(3), 521\u2013535 (2014)","journal-title":"IEEE TPAMI"},{"key":"33_CR25","unstructured":"Quadrianto, N., Lampert, C.H.: Learning multi-view neighborhood preserving projections. In: Proceedings of the ICML, pp. 425\u2013432 (2011)"},{"key":"33_CR26","unstructured":"Rashtchian, C., Young, P., Hodosh, M., Hockenmaier, J.: Collecting image annotations using Amazon\u2019s mechanical turk. In: NAACL HLT 2010 Workshop on Creating Speech and Language Data with Amazon\u2019s Mechanical Turk, pp. 139\u2013147 (2010)"},{"key":"33_CR27","doi-asserted-by":"crossref","unstructured":"Rasiwasia, N., et al.: A new approach to cross-modal multimedia retrieval. In: Proceedings of the ACM MM, pp. 251\u2013260 (2010)","DOI":"10.1145\/1873951.1873987"},{"key":"33_CR28","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: Proceedings of the ICML, pp. 1060\u20131069 (2016)"},{"key":"33_CR29","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arxiv:1409.1556 (2014)"},{"issue":"1","key":"33_CR30","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1162\/tacl_a_00177","volume":"2","author":"R Socher","year":"2014","unstructured":"Socher, R., Karpathy, A., Le, Q.V., Manning, C.D., Ng, A.Y.: Grounded compositional semantics for finding and describing images with sentences. Trans. Assoc. Comput. Linguist. 2(1), 207\u2013218 (2014)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"33_CR31","unstructured":"Srivastava, N., Salakhutdinov, R.R.: Multimodal learning with deep Boltzmann machines. In: Proceedings of the NIPS, pp. 2639\u20132664 (2012)"},{"issue":"3","key":"33_CR32","first-page":"623","volume":"26","author":"J Su","year":"2018","unstructured":"Su, J., Zeng, J., Xiong, D., Liu, Y., Wang, M., Xie, J.: A hierarchy-to-sequence attentional neural machine translation model. IEEE TASLP 26(3), 623\u2013632 (2018)","journal-title":"IEEE TASLP"},{"key":"33_CR33","doi-asserted-by":"crossref","unstructured":"Tzeng, E., Hoffman, J., Saenko, K., Darrell, T.: Adversarial discriminative domain adaptation. In: Proceedings of the CVPR, pp. 2962\u20132971 (2017)","DOI":"10.1109\/CVPR.2017.316"},{"key":"33_CR34","doi-asserted-by":"crossref","unstructured":"Wang, B., Yang, Y., Xu, X., Hanjalic, A., Shen, H.T.: Adversarial cross-modal retrieval. In: Proceedings of the ACM MM, pp. 154\u2013162 (2017)","DOI":"10.1145\/3123266.3123326"},{"issue":"10","key":"33_CR35","doi-asserted-by":"publisher","first-page":"2010","DOI":"10.1109\/TPAMI.2015.2505311","volume":"38","author":"K Wang","year":"2016","unstructured":"Wang, K., He, R., Wang, L., Wang, W., Tan, T.: Joint feature selection and subspace learning for cross-modal retrieval. IEEE TPAMI 38(10), 2010\u20132023 (2016)","journal-title":"IEEE TPAMI"},{"key":"33_CR36","doi-asserted-by":"crossref","unstructured":"Wang, K., He, R., Wang, W., Wang, L., Tan, T.: Learning coupled feature spaces for cross-modal matching. In: Proceedings of the ICCV, pp. 2088\u20132095 (2013)","DOI":"10.1109\/ICCV.2013.261"},{"key":"33_CR37","unstructured":"Wang, K., Yin, Q., Wang, W., Wu, S., Wang, L.: A comprehensive survey on cross-modal retrieval. arXiv preprint arxiv:1607.06215 (2016)"},{"issue":"1","key":"33_CR38","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1007\/s00778-015-0391-4","volume":"25","author":"W Wang","year":"2016","unstructured":"Wang, W., Yang, X., Ooi, B.C., Zhang, D., Zhuang, Y.: Effective deep learning-based multi-modal retrieval. VLDBJ 25(1), 79\u2013101 (2016)","journal-title":"VLDBJ"},{"key":"33_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"499","DOI":"10.1007\/978-3-319-46478-7_31","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Y Wen","year":"2016","unstructured":"Wen, Y., Zhang, K., Li, Z., Qiao, Y.: A discriminative feature learning approach for deep face recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 499\u2013515. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_31"},{"key":"33_CR40","doi-asserted-by":"crossref","unstructured":"Yuan, Z., Sang, J., Liu, Y., Xu, C.: Latent feature learning in social media network. In: Proceedings of the ACM MM, pp. 253\u2013263 (2013)","DOI":"10.1145\/2502081.2502284"},{"issue":"3","key":"33_CR41","doi-asserted-by":"publisher","first-page":"53:1","DOI":"10.1145\/2168752.2168767","volume":"3","author":"D Zhai","year":"2012","unstructured":"Zhai, D., Chang, H., Shan, S., Chen, X., Gao, W.: Multiview metric learning with global consistency and local smoothness. ACM Trans. Intell. Syst. Technol. 3(3), 53:1\u201353:22 (2012)","journal-title":"ACM Trans. Intell. Syst. Technol."},{"issue":"6","key":"33_CR42","first-page":"965","volume":"24","author":"X Zhai","year":"2014","unstructured":"Zhai, X., Peng, Y., Xiao, J.: Learning cross-media joint representation with sparse and semisupervised regularization. IEEE TCSVT 24(6), 965\u2013978 (2014)","journal-title":"IEEE TCSVT"},{"key":"33_CR43","doi-asserted-by":"crossref","unstructured":"Zhai, X., Peng, Y., Xiao, J.: Heterogeneous metric learning with joint graph regularization for cross-media retrieval. In: Proceedings of the AAAI, pp. 1198\u20131204 (2013)","DOI":"10.1609\/aaai.v27i1.8464"},{"issue":"9","key":"33_CR44","first-page":"5046","volume":"56","author":"L Zhu","year":"2018","unstructured":"Zhu, L., Chen, Y., Ghamisi, P., Benediktsson, J.A.: Generative adversarial networks for hyperspectral image classification. IEEE TGARS 56(9), 5046\u20135063 (2018)","journal-title":"IEEE TGARS"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2018"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-20873-8_33","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,18]],"date-time":"2022-09-18T17:18:10Z","timestamp":1663521490000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-20873-8_33"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030208721","9783030208738"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-20873-8_33","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"26 May 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Perth, WA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/accv2018.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"979","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"274","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"2.7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}}]}}