{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T19:24:29Z","timestamp":1774121069871,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSFC","award":["#11832001 and #71621002"],"award-info":[{"award-number":["#11832001 and #71621002"]}]},{"name":"Ministry of Science & Technology of China","award":["#2020AAA0108401 and #2020AAA0108405"],"award-info":[{"award-number":["#2020AAA0108401 and #2020AAA0108405"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475670","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T04:59:18Z","timestamp":1634533158000},"page":"5427-5435","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":13,"title":["MCCN"],"prefix":"10.1145","author":[{"given":"Zhixiong","family":"Zeng","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]},{"given":"Ying","family":"Sun","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]},{"given":"Wenji","family":"Mao","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/3042817.3043076"},{"key":"e_1_3_2_2_2_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654902"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/72.80344"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0658-4"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2941858"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1162\/0899766042321814"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331213"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00921"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2010.01.013"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413676"},{"key":"e_1_3_2_2_12_1","volume-title":"Fahad Shahbaz Khan, and Vineeth N Balasubramanian","author":"Joseph KJ","year":"2021"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2435740"},{"key":"e_1_3_2_2_14_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980(2014).","author":"Kingma Diederik P","year":"2014"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0925-2312(98)00030-7"},{"key":"e_1_3_2_2_16_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942","author":"Lan Zhenzhong","year":"2019"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413886"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/646270.685303"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0031-3203(00)00018-2"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2020.3007143"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/3104322.3104425"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/3061053.3061157"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2705068"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3284750"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2852503"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2015.2400779"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.5555\/1866696.1866717"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1873987"},{"key":"e_1_3_2_2_29_1","volume-title":"Proceedings of the 2010 Conference on Data Mining and Data Warehouses. 1--4.","author":"Rupnik Jan","year":"2010"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.5555\/2998828.2998888"},{"key":"e_1_3_2_2_31_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00643"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123326"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2505311"},{"key":"e_1_3_2_2_36_1","volume-title":"A comprehensive survey on cross-modal retrieval. arXiv preprint arXiv:1607.06215","author":"Wang Kaiye","year":"2016"},{"key":"e_1_3_2_2_37_1","volume-title":"Large-scale approximate kernel canonical correlation analysis. arXiv preprint arXiv:1511.04773","author":"Wang Weiran","year":"2015"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351034"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107335"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080678"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3454290"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.5555\/2892753.2892851"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00366"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3454804"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462867"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2013.2276704"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01064"}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475670","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475670","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:25Z","timestamp":1750193305000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475670"}},"subtitle":["Multimodal Coordinated Clustering Network for Large-Scale Cross-modal Retrieval"],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":47,"alternative-id":["10.1145\/3474085.3475670","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475670","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}