{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T16:13:38Z","timestamp":1761581618328,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,10,19]],"date-time":"2017-10-19T00:00:00Z","timestamp":1508371200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Basic Research Program of China (973 Program)","award":["2015CB351800"],"award-info":[{"award-number":["2015CB351800"]}]},{"name":"ARO grant","award":["W911NF-15-1-0290"],"award-info":[{"award-number":["W911NF-15-1-0290"]}]},{"name":"National Natural Science Foundation of China","award":["61332016 61620106009 61572465 61429201 U1636214 61650202"],"award-info":[{"award-number":["61332016 61620106009 61572465 61429201 U1636214 61650202"]}]},{"name":"Key Research Program of Frontier Sciences","award":["CAS: QYZDJ-SSW-SYS013"],"award-info":[{"award-number":["CAS: QYZDJ-SSW-SYS013"]}]},{"name":"NEC Laboratories of America and Blippar","award":["Faculty Research Gift Awards"],"award-info":[{"award-number":["Faculty Research Gift Awards"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,10,19]]},"DOI":"10.1145\/3123266.3123317","type":"proceedings-article","created":{"date-parts":[[2017,10,20]],"date-time":"2017-10-20T13:04:26Z","timestamp":1508504666000},"page":"907-915","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":19,"title":["Multi-Networks Joint Learning for Large-Scale Cross-Modal Retrieval"],"prefix":"10.1145","author":[{"given":"Liang","family":"Zhang","sequence":"first","affiliation":[{"name":"University of Chinese Academy of Sciences &amp; CAS, Beijing, China"}]},{"given":"Bingpeng","family":"Ma","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences &amp; CAS, Beijing, China"}]},{"given":"Guorong","family":"Li","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences &amp; CAS, Beijing, China"}]},{"given":"Qingming","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences &amp; CAS, Beijing, China"}]},{"given":"Qi","family":"Tian","sequence":"additional","affiliation":[{"name":"University of Texas at San Antonio, San Antonio, TX, USA"}]}],"member":"320","published-online":{"date-parts":[[2017,10,19]]},"reference":[{"volume-title":"Visualizing and Understanding Recurrent Networks. International Conference on Learning Representations workshop.","author":"Andrej K.","key":"e_1_3_2_1_1_1","unstructured":"K. Andrej , J. Justin , and F. Li . 2016 . Visualizing and Understanding Recurrent Networks. International Conference on Learning Representations workshop. K. Andrej, J. Justin, and F. Li. 2016. Visualizing and Understanding Recurrent Networks. International Conference on Learning Representations workshop."},{"volume-title":"Deep Visual-Semantic Alignments for Generating Image Descriptions IEEE Conference on Computer Vision and Pattern Recognition.","author":"Andrej K.","key":"e_1_3_2_1_2_1","unstructured":"K. Andrej and F. Li . 2015 . Deep Visual-Semantic Alignments for Generating Image Descriptions IEEE Conference on Computer Vision and Pattern Recognition. K. Andrej and F. Li. 2015. Deep Visual-Semantic Alignments for Generating Image Descriptions IEEE Conference on Computer Vision and Pattern Recognition."},{"volume-title":"Deep Canonical Correlation Analysis. In International Conference on Machine Learning.","author":"Andrew G.","key":"e_1_3_2_1_3_1","unstructured":"G. Andrew , R. Arora , J. Bilmes , and K. Livescu . 2013 . Deep Canonical Correlation Analysis. In International Conference on Machine Learning. G. Andrew, R. Arora, J. Bilmes, and K. Livescu. 2013. Deep Canonical Correlation Analysis. In International Conference on Machine Learning."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-009-9117-9"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"X. Chen and C. Zitnick. 2015. Mind's Eye: A Recurrent Visual Representation for Image Caption Generation IEEE Conference on Computer Vision and Pattern Recognition.  X. Chen and C. Zitnick. 2015. Mind's Eye: A Recurrent Visual Representation for Image Caption Generation IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2015.7298856"},{"volume-title":"Efficient Learning of Mahalanobis Metrics for Ranking International Conference on Machine Learning.","author":"Daryl L.","key":"e_1_3_2_1_7_1","unstructured":"L. Daryl and L. Gert . 2014 . Efficient Learning of Mahalanobis Metrics for Ranking International Conference on Machine Learning. L. Daryl and L. Gert. 2014. Efficient Learning of Mahalanobis Metrics for Ranking International Conference on Machine Learning."},{"key":"e_1_3_2_1_8_1","first-page":"2579","article-title":"Visualizing Data using t-SNE","volume":"9","author":"der Maaten L. Van","year":"2008","unstructured":"L. Van der Maaten and G. Hinton . 2008 . Visualizing Data using t-SNE . Journal of Machine Learning Research Vol. 9 (2008), 2579 -- 2605 . L. Van der Maaten and G. Hinton. 2008. Visualizing Data using t-SNE. Journal of Machine Learning Research Vol. 9 (2008), 2579--2605.","journal-title":"Journal of Machine Learning Research"},{"volume-title":"Long-term Recurrent Convolutional Networks for Visual Recognition and Description IEEE Conference on Computer Vision and Pattern Recognition.","author":"Donahue J.","key":"e_1_3_2_1_9_1","unstructured":"J. Donahue , L. Hendricks , S. Guadarrama , M. Rohrbach , S. Venugopalan , K. Saenko , and T. Darrell . 2015 . Long-term Recurrent Convolutional Networks for Visual Recognition and Description IEEE Conference on Computer Vision and Pattern Recognition. J. Donahue, L. Hendricks, S. Guadarrama, M. Rohrbach, S. Venugopalan, K. Saenko, and T. Darrell. 2015. Long-term Recurrent Convolutional Networks for Visual Recognition and Description IEEE Conference on Computer Vision and Pattern Recognition."},{"volume-title":"DeCAF: A Deep Convolutional Activation Feature for Generic Visual Recognition International Conference on Machine Learning.","author":"Donahue J.","key":"e_1_3_2_1_10_1","unstructured":"J. Donahue , Y. Jia , O. Vinyals , N. Zhang J. Hoffman , E. Tzeng , and T. Darrell . 2014 . DeCAF: A Deep Convolutional Activation Feature for Generic Visual Recognition International Conference on Machine Learning. J. Donahue, Y. Jia, O. Vinyals, N. Zhang J. Hoffman, E. Tzeng, and T. Darrell. 2014. DeCAF: A Deep Convolutional Activation Feature for Generic Visual Recognition International Conference on Machine Learning."},{"volume-title":"IEEE Conference on Computer Vision and Pattern Recognition.","author":"Fang H.","key":"e_1_3_2_1_11_1","unstructured":"H. Fang , S. Gupta , F. Iandola , R. Srivastava , L. Deng , P. Dollar , J. Gao , X. He , M. Mitchell , J. Platt , C. Zitnick , and G. Zweig . 2015. From Captions to Visual Concepts and Back . In IEEE Conference on Computer Vision and Pattern Recognition. H. Fang, S. Gupta, F. Iandola, R. Srivastava, L. Deng, P.Dollar, J. Gao, X. He, M. Mitchell, J. Platt, C. Zitnick, and G. Zweig. 2015. From Captions to Visual Concepts and Back. In IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_12_1","volume-title":"Devise: A deep Visual-Semantic Embedding Model. In Advances in Neural Information Processing Systems.","author":"Frome A.","year":"2013","unstructured":"A. Frome , G. Corrado , J. Shlens , S. Bengio , J. Dean , and T. Mikolov . 2013 . Devise: A deep Visual-Semantic Embedding Model. In Advances in Neural Information Processing Systems. A. Frome, G. Corrado, J. Shlens, S. Bengio, J. Dean, and T. Mikolov. 2013. Devise: A deep Visual-Semantic Embedding Model. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_13_1","unstructured":"H. Gao J. Mao J. Zhou Z. Huang and A. Yuille. 2015. Are You Talking to a Machine? Dataset and Methods for Multilingual Image Question Advances in Neural Information Processing Systems.   H. Gao J. Mao J. Zhou Z. Huang and A. Yuille. 2015. Are You Talking to a Machine? Dataset and Methods for Multilingual Image Question Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_14_1","volume-title":"Wealthy: Deep Transfer Learning through Selective Joint Fine-tuning IEEE Conference on Computer Vision and Pattern Recognition.","author":"Ge W.","year":"2017","unstructured":"W. Ge and Y. Yu . 2017 . Borrowing Treasures from the Wealthy: Deep Transfer Learning through Selective Joint Fine-tuning IEEE Conference on Computer Vision and Pattern Recognition. W. Ge and Y. Yu. 2017. Borrowing Treasures from the Wealthy: Deep Transfer Learning through Selective Joint Fine-tuning IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0658-4"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.70791"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1162\/0899766042321814"},{"key":"e_1_3_2_1_18_1","volume-title":"Deep Residual Learning for Image Recognition. In IEEE Conference on Computer Vision and Pattern Recognition.","author":"He K.","year":"2016","unstructured":"K. He , X. Zhang , S. Ren , and J. sun. 2016 . Deep Residual Learning for Image Recognition. In IEEE Conference on Computer Vision and Pattern Recognition. K. He, X. Zhang, S. Ren, and J. sun. 2016. Deep Residual Learning for Image Recognition. In IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-010-5198-3"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2390499"},{"key":"e_1_3_2_1_21_1","volume-title":"lee, D. Kwak, and M. Heo","author":"Kim J.","year":"2016","unstructured":"J. Kim , S. lee, D. Kwak, and M. Heo . 2016 . Multimodal Residual Learning for visual QA. In Advances in Neural Information Processing Systems . J. Kim, S. lee, D. Kwak, and M. Heo. 2016. Multimodal Residual Learning for visual QA. In Advances in Neural Information Processing Systems."},{"volume-title":"Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision.","author":"Lin T.","key":"e_1_3_2_1_22_1","unstructured":"T. Lin , M. Maire , S. Belongie , J. Hays , P. Perona , D. Ramanan , P. Dollar , and C. Zitnick . 2014 . Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision. T. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan, P. Dollar, and C. Zitnick. 2014. Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision."},{"key":"e_1_3_2_1_23_1","unstructured":"V. Mahadevan C. Wong J. Pereira T. Liu N. Vasconcelos and L. Saul. 2011. Maximum Covariance Unfolding: Manifold Learning for Bimodal Data Advances in Neural Information Processing Systems.   V. Mahadevan C. Wong J. Pereira T. Liu N. Vasconcelos and L. Saul. 2011. Maximum Covariance Unfolding: Manifold Learning for Bimodal Data Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.9"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502087"},{"volume-title":"Efficient Estimation of Word Representations in Vector Space International Conference on Learning Representations Workshop.","author":"Mikolov T.","key":"e_1_3_2_1_26_1","unstructured":"T. Mikolov , K. Chen , G. Corrado , and J. Dean . 2013 a . Efficient Estimation of Word Representations in Vector Space International Conference on Learning Representations Workshop. T. Mikolov, K. Chen, G. Corrado, and J. Dean. 2013 a. Efficient Estimation of Word Representations in Vector Space International Conference on Learning Representations Workshop."},{"key":"e_1_3_2_1_27_1","unstructured":"T. Mikolov I. Sutskever K. Chen G. Corrado and J. Dean. 2013 b. Distributed Representations of Words and Phrases and their Compositionality Advances in Neural Information Processing Systems.   T. Mikolov I. Sutskever K. Chen G. Corrado and J. Dean. 2013 b. Distributed Representations of Words and Phrases and their Compositionality Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.142"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.466"},{"volume-title":"Cluster Canonical Correlation Analysis. In International Conference on Artificial Intelligence and Statistics.","author":"Rasiwasia N.","key":"e_1_3_2_1_30_1","unstructured":"N. Rasiwasia , D. Mahajan , V. Mahadevan , and G. Aggarwal . 2014 . Cluster Canonical Correlation Analysis. In International Conference on Artificial Intelligence and Statistics. N. Rasiwasia, D. Mahajan, V. Mahadevan, and G. Aggarwal. 2014. Cluster Canonical Correlation Analysis. In International Conference on Artificial Intelligence and Statistics."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2007.900138"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1873987"},{"key":"e_1_3_2_1_33_1","unstructured":"M. Ren R. Kiros and R. Zemel. 2015. Exploring Models and Data for Image Question Answering Advances in Neural Information Processing Systems.   M. Ren R. Kiros and R. Zemel. 2015. Exploring Models and Data for Image Question Answering Advances in Neural Information Processing Systems."},{"volume-title":"Generalized Multiview Analysis: A Discriminative Latent Space IEEE Conference on Computer Vision and Pattern Recognition.","author":"Sharma A.","key":"e_1_3_2_1_34_1","unstructured":"A. Sharma , A. Kumar , D. Hal , and D. Jacobs . 2012 . Generalized Multiview Analysis: A Discriminative Latent Space IEEE Conference on Computer Vision and Pattern Recognition. A. Sharma, A. Kumar, D. Hal, and D. Jacobs. 2012. Generalized Multiview Analysis: A Discriminative Latent Space IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_35_1","unstructured":"N. Srivastava and R. Salakhutdinov. 2012. Multimodal Learning with Deep Boltzmann Machines. Advances in Neural Information Processing Systems.   N. Srivastava and R. Salakhutdinov. 2012. Multimodal Learning with Deep Boltzmann Machines. Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_36_1","unstructured":"I. Sutskever O. Vinyals and Q. Le. 2014. Sequence to Sequence Learning with Neural Networks Advances in Neural Information Processing Systems.   I. Sutskever O. Vinyals and Q. Le. 2014. Sequence to Sequence Learning with Neural Networks Advances in Neural Information Processing Systems."},{"volume-title":"Show and Tell: A Neural Image Caption Generator. IEEE Conference on Computer Vision and Pattern Recognition.","author":"Vinyals O.","key":"e_1_3_2_1_37_1","unstructured":"O. Vinyals , A. Toshev , S. Bengio , and D. Erhan . 2015 . Show and Tell: A Neural Image Caption Generator. IEEE Conference on Computer Vision and Pattern Recognition. O. Vinyals, A. Toshev, S. Bengio, and D. Erhan. 2015. Show and Tell: A Neural Image Caption Generator. IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.261"},{"volume-title":"Learning Deep Structure-Preserving Image-Text Enbeddings IEEE Conference on Computer Vision and Pattern Recognition.","author":"Wang L.","key":"e_1_3_2_1_39_1","unstructured":"L. Wang , Y. Li , and S. Lazebnik . 2016 . Learning Deep Structure-Preserving Image-Text Enbeddings IEEE Conference on Computer Vision and Pattern Recognition. L. Wang, Y. Li, and S. Lazebnik. 2016. Learning Deep Structure-Preserving Image-Text Enbeddings IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502097"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"volume-title":"Heterogeneous Metric Learning with Joint Graph Regularization for Cross-Media Retrieval AAAI Conference on Artificial Intelligence.","author":"Zhai X.","key":"e_1_3_2_1_42_1","unstructured":"X. Zhai , Y. Peng , and J. Xiao . 2013 . Heterogeneous Metric Learning with Joint Graph Regularization for Cross-Media Retrieval AAAI Conference on Artificial Intelligence. X. Zhai, Y. Peng, and J. Xiao. 2013. Heterogeneous Metric Learning with Joint Graph Regularization for Cross-Media Retrieval AAAI Conference on Artificial Intelligence."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2964336"}],"event":{"name":"MM '17: ACM Multimedia Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Mountain View California USA","acronym":"MM '17"},"container-title":["Proceedings of the 25th ACM international conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3123266.3123317","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3123266.3123317","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:39:28Z","timestamp":1750217968000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3123266.3123317"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,10,19]]},"references-count":43,"alternative-id":["10.1145\/3123266.3123317","10.1145\/3123266"],"URL":"https:\/\/doi.org\/10.1145\/3123266.3123317","relation":{},"subject":[],"published":{"date-parts":[[2017,10,19]]},"assertion":[{"value":"2017-10-19","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}