{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:26:53Z","timestamp":1766269613612,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T00:00:00Z","timestamp":1698796800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,2]]},"DOI":"10.1145\/3606041.3618058","type":"proceedings-article","created":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T22:06:27Z","timestamp":1698876387000},"page":"5-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Text-based Person Search in Full Images via Semantic-Driven Proposal Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5914-7109","authenticated-orcid":false,"given":"Shizhou","family":"Zhang","sequence":"first","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1932-4390","authenticated-orcid":false,"given":"De","family":"Cheng","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5439-2749","authenticated-orcid":false,"given":"Wenlong","family":"Luo","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6021-8261","authenticated-orcid":false,"given":"Yinghui","family":"Xing","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0638-4442","authenticated-orcid":false,"given":"Duo","family":"Long","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2419-2700","authenticated-orcid":false,"given":"Hao","family":"Li","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7997-9930","authenticated-orcid":false,"given":"Kai","family":"Niu","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8710-5520","authenticated-orcid":false,"given":"Guoqiang","family":"Liang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2977-8057","authenticated-orcid":false,"given":"Yanning","family":"Zhang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]}],"member":"320","published-online":{"date-parts":[[2023,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_6"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_4"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_45"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01263"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00208"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414455"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 24th International Conference on Artificial Intelligence. 3402--3408","author":"Chen YingCong","year":"2015","unstructured":"YingCong Chen , WeiShi Zheng , and Jianhuang Lai . 2015 . Mirror Representation for Modeling View-Specific Transform in Person Re-Identification . In Proceedings of the 24th International Conference on Artificial Intelligence. 3402--3408 . YingCong Chen, WeiShi Zheng, and Jianhuang Lai. 2015. Mirror Representation for Modeling View-Specific Transform in Person Re-Identification. In Proceedings of the 24th International Conference on Artificial Intelligence. 3402--3408."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.107120"},{"key":"e_1_3_2_1_9_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2018 . Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018). Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00375"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00291"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00919"},{"key":"e_1_3_2_1_13_1","volume-title":"Contextual Non-Local Alignment over Full-Scale Representation for Text-Based Person Search. arXiv preprint arXiv:2101.03036","author":"Gao Chenyang","year":"2021","unstructured":"Chenyang Gao , Guanyu Cai , Xinyang Jiang , Feng Zheng , Jun Zhang , Yifei Gong , Pai Peng , Xiaowei Guo , and Xing Sun . 2021. Contextual Non-Local Alignment over Full-Scale Representation for Text-Based Person Search. arXiv preprint arXiv:2101.03036 ( 2021 ). Chenyang Gao, Guanyu Cai, Xinyang Jiang, Feng Zheng, Jun Zhang, Yifei Gong, Pai Peng, Xiaowei Guo, and Xing Sun. 2021. Contextual Non-Local Alignment over Full-Scale Representation for Text-Based Person Search. arXiv preprint arXiv:2101.03036 (2021)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01176"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00991"},{"key":"e_1_3_2_1_16_1","volume-title":"End-to-End Detection and Re-identification Integrated Net for Person Search. In Asian Conference on Computer Vision. 349--364","author":"He Zhenwei","year":"2018","unstructured":"Zhenwei He and Lei Zhang . 2018 . End-to-End Detection and Re-identification Integrated Net for Person Search. In Asian Conference on Computer Vision. 349--364 . Zhenwei He and Lei Zhang. 2018. End-to-End Detection and Re-identification Integrated Net for Person Search. In Asian Conference on Computer Vision. 349--364."},{"key":"e_1_3_2_1_17_1","volume-title":"Squeeze-and-Excitation Networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 7132--7141","author":"Hu Jie","year":"2018","unstructured":"Jie Hu , Li Shen , and Gang Sun . 2018 . Squeeze-and-Excitation Networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 7132--7141 . Jie Hu, Li Shen, and Gang Sun. 2018. Squeeze-and-Excitation Networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 7132--7141."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2020.103970"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2018.10.020"},{"key":"e_1_3_2_1_20_1","volume-title":"Instance and Pair-Aware Dynamic Networks for Re-Identification. arXiv preprint arXiv:2103.05395","author":"Jiao Bingliang","year":"2021","unstructured":"Bingliang Jiao , Xin Tan , Lu Yang , Yunlong Wang , and Peng Wang . 2021. Instance and Pair-Aware Dynamic Networks for Re-Identification. arXiv preprint arXiv:2103.05395 ( 2021 ). Bingliang Jiao, Xin Tan, Lu Yang, Yunlong Wang, and Peng Wang. 2021. Instance and Pair-Aware Dynamic Networks for Re-Identification. arXiv preprint arXiv:2103.05395 (2021)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6777"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01069"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_33"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.209"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.551"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1239--1248","author":"Li Zhang","year":"2016","unstructured":"Zhang Li , Tao Xiang , and Shaogang Gong . 2016 . Learning a Discriminative Null Space for Person Re-Identification . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1239--1248 . Zhang Li, Tao Xiang, and Shaogang Gong. 2016. Learning a Discriminative Null Space for Person Re-Identification. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1239--1248."},{"volume-title":"Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision. 740--755","author":"Lin Tsung-Yi","key":"e_1_3_2_1_28_1","unstructured":"Tsung-Yi Lin , Michael Maire , Serge J. Belongie , James Hays , Pietro Perona , Deva Ramanan , Piotr Doll\u00e1r , and C. Lawrence Zitnick . 2014 . Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision. 740--755 . Tsung-Yi Lin, Michael Maire, Serge J. Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision. 740--755."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.61"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00431"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350991"},{"volume-title":"Proceedings of the European Conference on Computer Vision (ECCV). 858--877","author":"Das Abir","key":"e_1_3_2_1_32_1","unstructured":"Martinel, Niki, Abir Das , Christian Micheloni , and Amit K . Roy-Chowdhury. 2016. Temporal Model Adaptation for Person Re-Identification . In Proceedings of the European Conference on Computer Vision (ECCV). 858--877 . Martinel, Niki, Abir Das, Christian Micheloni, and Amit K. Roy-Chowdhury. 2016. Temporal Model Adaptation for Person Re-Identification. In Proceedings of the European Conference on Computer Vision (ECCV). 858--877."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00090"},{"key":"e_1_3_2_1_34_1","unstructured":"Bharti Munjal Fabio Galasso and Sikandar Amin. 2019. Knowledge Distillation for End-to-End Person Search.. In BMVC. 216.  Bharti Munjal Fabio Galasso and Sikandar Amin. 2019. Knowledge Distillation for End-to-End Person Search.. In BMVC. 216."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2984883"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00591"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00051"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_25"},{"key":"e_1_3_2_1_39_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS). 1432--1442","author":"Vu Thang","year":"2019","unstructured":"Thang Vu , Hyunjun Jang , Trung X Pham , and Chang Dong Yoo . 2019 . Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution . In Conference on Neural Information Processing Systems (NeurIPS). 1432--1442 . Thang Vu, Hyunjun Jang, Trung X Pham, and Chang Dong Yoo. 2019. Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution. In Conference on Neural Information Processing Systems (NeurIPS). 1432--1442."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01197"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00308"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00055"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.360"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654965"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00226"},{"key":"e_1_3_2_1_46_1","volume-title":"Hoi","author":"Ye Mang","year":"2021","unstructured":"Mang Ye , Jianbing Shen , Gaojie Lin , Tao Xiang , Ling Shao , and Steven C.H . Hoi . 2021 . Deep Learning for Person Re-identification: A Survey and Outlook. IEEE Transactions on Pattern Analysis and Machine Intelligence ( 2021), 1--1. Mang Ye, Jianbing Shen, Gaojie Lin, Tao Xiang, Ling Shao, and Steven C.H. Hoi. 2021. Deep Learning for Person Re-identification: A Survey and Outlook. IEEE Transactions on Pattern Analysis and Machine Intelligence (2021), 1--1."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-019-07939-w"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2977528"},{"key":"e_1_3_2_1_49_1","volume-title":"Diverse Knowledge Distillation for End-to-End Person Search. arXiv preprint arXiv:2012.11187","author":"Zhang Xinyu","year":"2020","unstructured":"Xinyu Zhang , Xinlong Wang , JiaWang Bian , Chunhua Shen , and Mingyu You . 2020. Diverse Knowledge Distillation for End-to-End Person Search. arXiv preprint arXiv:2012.11187 ( 2020 ). Xinyu Zhang, Xinlong Wang, JiaWang Bian, Chunhua Shen, and Mingyu You. 2020. Diverse Knowledge Distillation for End-to-End Person Search. arXiv preprint arXiv:2012.11187 (2020)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2020.115876"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.5555\/2919332.2919877"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.357"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995598"},{"key":"e_1_3_2_1_55_1","volume-title":"Dual-path convolutional image-text embedding with instance loss. arXiv preprint arXiv:1711.05535","author":"Zheng Zhedong","year":"2017","unstructured":"Zhedong Zheng , Liang Zheng , Michael Garrett , Yi Yang , and YiDong Shen . 2017b. Dual-path convolutional image-text embedding with instance loss. arXiv preprint arXiv:1711.05535 ( 2017 ). Zhedong Zheng, Liang Zheng, Michael Garrett, Yi Yang, and YiDong Shen. 2017b. Dual-path convolutional image-text embedding with instance loss. arXiv preprint arXiv:1711.05535 (2017)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00686"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12241"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Ottawa ON Canada","acronym":"MM '23"},"container-title":["Proceedings of the 4th International Workshop on Human-centric Multimedia Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3606041.3618058","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3606041.3618058","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:20Z","timestamp":1750178180000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3606041.3618058"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11]]},"references-count":57,"alternative-id":["10.1145\/3606041.3618058","10.1145\/3606041"],"URL":"https:\/\/doi.org\/10.1145\/3606041.3618058","relation":{},"subject":[],"published":{"date-parts":[[2023,11]]},"assertion":[{"value":"2023-11-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}