{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T23:01:12Z","timestamp":1781910072138,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T00:00:00Z","timestamp":1665360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100010023","name":"Natural Science Research of Jiangsu Higher Education Institutions of China","doi-asserted-by":"publisher","award":["19KJB520009"],"award-info":[{"award-number":["19KJB520009"]}],"id":[{"id":"10.13039\/501100010023","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Future Network Scientific Research Fund Project","award":["FNSRFP-2021-YB-21"],"award-info":[{"award-number":["FNSRFP-2021-YB-21"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62101245, 61972016"],"award-info":[{"award-number":["62101245, 61972016"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2019M661999"],"award-info":[{"award-number":["2019M661999"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,10]]},"DOI":"10.1145\/3503161.3548166","type":"proceedings-article","created":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T15:43:12Z","timestamp":1665416592000},"page":"1984-1992","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":114,"title":["Look Before You Leap: Improving Text-based Person Retrieval by Learning A Consistent Cross-modal Common Manifold"],"prefix":"10.1145","author":[{"given":"Zijie","family":"Wang","sequence":"first","affiliation":[{"name":"Nanjing Tech University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Aichun","family":"Zhu","sequence":"additional","affiliation":[{"name":"Nanjing Tech University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jingyi","family":"Xue","sequence":"additional","affiliation":[{"name":"Nanjing Tech University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xili","family":"Wan","sequence":"additional","affiliation":[{"name":"Nanjing Tech University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chao","family":"Liu","sequence":"additional","affiliation":[{"name":"Jinling Institute of Technology, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tian","family":"Wang","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yifeng","family":"Li","sequence":"additional","affiliation":[{"name":"Nanjing Tech University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2022,10,10]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093640"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_4"},{"key":"e_1_3_2_2_3_1","volume-title":"2018 IEEE Winter Conference on Applications of Computer Vision (WACV). 1879--1887","author":"Chen T.","unstructured":"T. Chen , C. Xu , and J. Luo . 2018b. Improving Text-Based Person Search by Spatial Matching and Adaptive Threshold . In 2018 IEEE Winter Conference on Applications of Computer Vision (WACV). 1879--1887 . T. Chen, C. Xu, and J. Luo. 2018b. Improving Text-Based Person Search by Spatial Matching and Adaptive Threshold. In 2018 IEEE Winter Conference on Applications of Computer Vision (WACV). 1879--1887."},{"key":"e_1_3_2_2_4_1","volume-title":"Semantically Self-Aligned Network for Text-to-Image Part-aware Person Re-identification. arXiv preprint arXiv:2107.12666","author":"Ding Zefeng","year":"2021","unstructured":"Zefeng Ding , Changxing Ding , Zhiyin Shao , and Dacheng Tao . 2021. Semantically Self-Aligned Network for Text-to-Image Part-aware Person Re-identification. arXiv preprint arXiv:2107.12666 ( 2021 ). Zefeng Ding, Changxing Ding, Zhiyin Shao, and Dacheng Tao. 2021. Semantically Self-Aligned Network for Text-to-Image Part-aware Person Re-identification. arXiv preprint arXiv:2107.12666 (2021)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3243316"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00621"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00954"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_43"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6777"},{"key":"e_1_3_2_2_11_1","volume-title":"Adam: A Method for Stochastic Optimization. In 3rd International Conference on Learning Representations, ICLR","author":"Diederik","year":"2015","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015 . Adam: A Method for Stochastic Optimization. In 3rd International Conference on Learning Representations, ICLR 2015 . Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In 3rd International Conference on Learning Representations, ICLR 2015."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.209"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.551"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298832"},{"key":"e_1_3_2_2_15_1","volume-title":"Multi-task mid-level feature alignment network for unsupervised cross-dataset person re-identification. arXiv preprint arXiv:1807.01440","author":"Lin Shan","year":"2018","unstructured":"Shan Lin , Haoliang Li , Chang-Tsun Li , and Alex Chichung Kot . 2018. Multi-task mid-level feature alignment network for unsupervised cross-dataset person re-identification. arXiv preprint arXiv:1807.01440 ( 2018 ). Shan Lin, Haoliang Li, Chang-Tsun Li, and Alex Chichung Kot. 2018. Multi-task mid-level feature alignment network for unsupervised cross-dataset person re-identification. arXiv preprint arXiv:1807.01440 (2018)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350991"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.05.106"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2984883"},{"key":"e_1_3_2_2_19_1","volume-title":"Adaptive Label-aware Graph Convolutional Networks for Cross-Modal Retrieval","author":"Qian Shengsheng","year":"2021","unstructured":"Shengsheng Qian , Dizhan Xue , Quan Fang , and Changsheng Xu. 2021. Adaptive Label-aware Graph Convolutional Networks for Cross-Modal Retrieval . IEEE Transactions on Multimedia ( 2021 ). Shengsheng Qian, Dizhan Xue, Quan Fang, and Changsheng Xu. 2021. Adaptive Label-aware Graph Convolutional Networks for Cross-Modal Retrieval. IEEE Transactions on Multimedia (2021)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.13"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00591"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331229"},{"key":"e_1_3_2_2_23_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez Lukasz Kaiser and Illia Polosukhin. 2017a. Attention is all you need. In Advances in neural information processing systems. 5998--6008.  Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez Lukasz Kaiser and Illia Polosukhin. 2017a. Attention is all you need. In Advances in neural information processing systems. 5998--6008."},{"key":"e_1_3_2_2_24_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , Lukasz Kaiser , and Illia Polosukhin . 2017b. Attention is all you need. Advances in neural information processing systems , Vol. 30 ( 2017 ). Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017b. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60636-7_27"},{"key":"e_1_3_2_2_27_1","volume-title":"Cross-modal food retrieval: learning a joint embedding of food images and recipes with semantic consistency and attention mechanism","author":"Wang Hao","year":"2021","unstructured":"Hao Wang , Doyen Sahoo , Chenghao Liu , Ke Shu , Palakorn Achananuparp , Ee-peng Lim, and CH Steven Hoi . 2021a. Cross-modal food retrieval: learning a joint embedding of food images and recipes with semantic consistency and attention mechanism . IEEE Transactions on Multimedia ( 2021 ). Hao Wang, Doyen Sahoo, Chenghao Liu, Ke Shu, Palakorn Achananuparp, Ee-peng Lim, and CH Steven Hoi. 2021a. Cross-modal food retrieval: learning a joint embedding of food images and recipes with semantic consistency and attention mechanism. IEEE Transactions on Multimedia (2021)."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00242"},{"key":"e_1_3_2_2_29_1","volume-title":"AMEN: Adversarial Multi-space Embedding Network for Text-Based Person Re-identification. In Chinese Conference on Pattern Recognition and Computer Vision (PRCV). Springer, 462--473","author":"Wang Zijie","year":"2021","unstructured":"Zijie Wang , Jingyi Xue , Aichun Zhu , Yifeng Li , Mingyi Zhang , and Chongliang Zhong . 2021 b. AMEN: Adversarial Multi-space Embedding Network for Text-Based Person Re-identification. In Chinese Conference on Pattern Recognition and Computer Vision (PRCV). Springer, 462--473 . Zijie Wang, Jingyi Xue, Aichun Zhu, Yifeng Li, Mingyi Zhang, and Chongliang Zhong. 2021b. AMEN: Adversarial Multi-space Embedding Network for Text-Based Person Re-identification. In Chinese Conference on Pattern Recognition and Computer Vision (PRCV). Springer, 462--473."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.108891"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.29.4.043028"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00016"},{"key":"e_1_3_2_2_33_1","volume-title":"Proceedings of the IEEE International Conference on Computer Vision. 3760--3769","author":"Xia Bryan Ning","year":"2019","unstructured":"Bryan Ning Xia , Yuan Gong , Yizhe Zhang , and Christian Poellabauer . 2019 . Second-order non-local attention networks for person re-identification . In Proceedings of the IEEE International Conference on Computer Vision. 3760--3769 . Bryan Ning Xia, Yuan Gong, Yizhe Zhang, and Christian Poellabauer. 2019. Second-order non-local attention networks for person re-identification. In Proceedings of the IEEE International Conference on Computer Vision. 3760--3769."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2014.16"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.10.083"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"e_1_3_2_2_37_1","volume-title":"A scalable sub-graph regularization for efficient content based image retrieval with long-term relevance feedback enhancement. Knowledge-based systems","author":"Zhao Mingbo","year":"2021","unstructured":"Mingbo Zhao , Jiao Liu , Zhao Zhang , and Jicong Fan . 2021. A scalable sub-graph regularization for efficient content based image retrieval with long-term relevance feedback enhancement. Knowledge-based systems , Vol. 212 ( 2021 ), 106505. Mingbo Zhao, Jiao Liu, Zhao Zhang, and Jicong Fan. 2021. A scalable sub-graph regularization for efficient content based image retrieval with long-term relevance feedback enhancement. Knowledge-based systems, Vol. 212 (2021), 106505."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413864"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383184"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475369"}],"event":{"name":"MM '22: The 30th ACM International Conference on Multimedia","location":"Lisboa Portugal","acronym":"MM '22","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 30th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3548166","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503161.3548166","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:00:19Z","timestamp":1750186819000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3548166"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,10]]},"references-count":40,"alternative-id":["10.1145\/3503161.3548166","10.1145\/3503161"],"URL":"https:\/\/doi.org\/10.1145\/3503161.3548166","relation":{},"subject":[],"published":{"date-parts":[[2022,10,10]]},"assertion":[{"value":"2022-10-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}