{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:05:05Z","timestamp":1775815505941,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":79,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611709","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"4492-4501","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":126,"title":["Towards Unified Text-based Person Retrieval: A Large-scale Multi-Attribute and Language Search Benchmark"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0409-1467","authenticated-orcid":false,"given":"Shuyu","family":"Yang","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2211-6688","authenticated-orcid":false,"given":"Yinan","family":"Zhou","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2434-9050","authenticated-orcid":false,"given":"Zhedong","family":"Zheng","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6596-8117","authenticated-orcid":false,"given":"Yaxiong","family":"Wang","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2136-3196","authenticated-orcid":false,"given":"Li","family":"Zhu","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6366-9834","authenticated-orcid":false,"given":"Yujiao","family":"Wu","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Venkatesh Babu Radhakrishnan, and Anirban Chakraborty","author":"Aggarwal Surbhi","year":"2020","unstructured":"Surbhi Aggarwal, Venkatesh Babu Radhakrishnan, and Anirban Chakraborty. 2020. Text-based person search via attribute-aided matching. In WACV. 2617--2625."},{"key":"e_1_3_2_2_2_1","volume-title":"Synthetic Data from Diffusion Models Improves ImageNet Classification. arXiv preprint arXiv:2304.08466","author":"Azizi Shekoofeh","year":"2023","unstructured":"Shekoofeh Azizi, Simon Kornblith, Chitwan Saharia, Mohammad Norouzi, and David J Fleet. 2023. Synthetic Data from Diffusion Models Improves ImageNet Classification. arXiv preprint arXiv:2304.08466 (2023)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Tim Brooks Aleksander Holynski and Alexei A Efros. 2023. Instructpix2pix: Learning to follow image editing instructions. In CVPR. 18392--18402.","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_2_2_4_1","unstructured":"Z. Cao G. Hidalgo Martinez T. Simon S. Wei and Y. A. Sheikh. 2019. OpenPose: Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields. IEEE Transactions on Pattern Analysis and Machine Intelligence (2019)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Zhe Cao Tomas Simon Shih-En Wei and Yaser Sheikh. 2017. Realtime multi-person 2d pose estimation using part affinity fields. In CVPR. 7291--7299.","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Dapeng Chen Hongsheng Li Xihui Liu Yantao Shen Jing Shao Zejian Yuan and Xiaogang Wang. 2018a. Improving deep visual representation for person re-identification by global and local image-language association. In ECCV. 54--70.","DOI":"10.1007\/978-3-030-01270-0_4"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Tianlang Chen Chenliang Xu and Jiebo Luo. 2018b. Improving text-based person search by spatial matching and adaptive threshold. In WACV. 1879--1887.","DOI":"10.1109\/WACV.2018.00208"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Weihua Chen Xianzhe Xu Jian Jia Hao Luo Yaohua Wang Fan Wang Rong Jin and Xiuyu Sun. 2023. Beyond Appearance: a Semantic Controllable Self-Supervised Learning Framework for Human-Centric Visual Tasks. In CVPR. 15050--15061.","DOI":"10.1109\/CVPR52729.2023.01445"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.04.081"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00359"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","unstructured":"Jia Deng Wei Dong Richard Socher Li-Jia Li Kai Li and Li Fei-Fei. 2009. ImageNet: A large-scale hierarchical image database. In CVPR. 248--255. https:\/\/doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_12_1","volume-title":"Semantically Self-Aligned Network for Text-to-Image Part-aware Person Re-identification. arXiv preprint arXiv:2107.12666","author":"Ding Zefeng","year":"2021","unstructured":"Zefeng Ding, Changxing Ding, Zhiyin Shao, and Dacheng Tao. 2021. Semantically Self-Aligned Network for Text-to-Image Part-aware Person Re-identification. arXiv preprint arXiv:2107.12666 (2021)."},{"key":"e_1_3_2_2_13_1","unstructured":"Bryce Drennan. 2022. imaginAIry. https:\/\/github.com\/brycedrennan\/imaginAIry. Accessed: 2022-05-04."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i4.20370"},{"key":"e_1_3_2_2_15_1","volume-title":"Contextual non-local alignment over full-scale representation for text-based person search. arXiv preprint arXiv:2101.03036","author":"Gao Chenyang","year":"2021","unstructured":"Chenyang Gao, Guanyu Cai, Xinyang Jiang, Feng Zheng, Jun Zhang, Yifei Gong, Pai Peng, Xiaowei Guo, and Xing Sun. 2021. Contextual non-local alignment over full-scale representation for text-based person search. arXiv preprint arXiv:2101.03036 (2021)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Kai Han Jianyuan Guo Chao Zhang and Mingjian Zhu. 2018. Attribute-aware attention model for fine-grained representation learning. In ACM MM. 2040--2048.","DOI":"10.1145\/3240508.3240550"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"crossref","unstructured":"Xiao Han Sen He Li Zhang and Tao Xiang. 2021. Text-Based Person Search with Limited Data. In BMVC.","DOI":"10.5244\/C.35.10"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"Keke He Zhanxiong Wang Yanwei Fu Rui Feng Yu-Gang Jiang and Xiangyang Xue. 2017. Adaptively weighted multi-task deep network for person attribute classification. In ACM MM. 1636--1644.","DOI":"10.1145\/3123266.3123424"},{"key":"e_1_3_2_2_19_1","volume-title":"Eurosat: A novel dataset and deep learning benchmark for land use and land cover classification","author":"Helber Patrick","year":"2019","unstructured":"Patrick Helber, Benjamin Bischke, Andreas Dengel, and Damian Borth. 2019. Eurosat: A novel dataset and deep learning benchmark for land use and land cover classification. IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing (2019)."},{"key":"e_1_3_2_2_20_1","volume-title":"Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626","author":"Hertz Amir","year":"2022","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)."},{"key":"e_1_3_2_2_21_1","volume-title":"Rethinking of pedestrian attribute recognition: A reliable evaluation under zero-shot pedestrian identity setting. arXiv preprint arXiv:2107.03576","author":"Jia Jian","year":"2021","unstructured":"Jian Jia, Houjing Huang, Xiaotang Chen, and Kaiqi Huang. 2021. Rethinking of pedestrian attribute recognition: A reliable evaluation under zero-shot pedestrian identity setting. arXiv preprint arXiv:2107.03576 (2021)."},{"key":"e_1_3_2_2_22_1","volume-title":"Rethinking of pedestrian attribute recognition: Realistic datasets with efficient method. arXiv preprint arXiv:2005.11909","author":"Jia Jian","year":"2020","unstructured":"Jian Jia, Houjing Huang, Wenjie Yang, Xiaotang Chen, and Kaiqi Huang. 2020. Rethinking of pedestrian attribute recognition: Realistic datasets with efficient method. arXiv preprint arXiv:2005.11909 (2020)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"crossref","unstructured":"Yiqi Jiang Weihua Chen Xiuyu Sun Xiaoyu Shi Fan Wang and Hao Li. 2021. Exploring the quality of gan generated images for person re-identification. In ACM MM. 4146--4155.","DOI":"10.1145\/3474085.3475547"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6777"},{"key":"e_1_3_2_2_25_1","first-page":"2","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","volume":"1","author":"Ming-Wei Chang Jacob Devlin","year":"2019","unstructured":"Jacob Devlin Ming-Wei Chang Kenton and Lee Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In NAACL-HLT, Vol. 1. 2.","journal-title":"NAACL-HLT"},{"key":"e_1_3_2_2_26_1","unstructured":"Kuang-Huei Lee Xi Chen Gang Hua Houdong Hu and Xiaodong He. 2018. Stacked cross attention for image-text matching. In ECCV. 201--216."},{"key":"e_1_3_2_2_27_1","volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In ICML.","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In ICML."},{"key":"e_1_3_2_2_28_1","unstructured":"Shuang Li Tong Xiao Hongsheng Li Wei Yang and Xiaogang Wang. 2017a. Identity-aware textual-visual matching with latent co-attention. In ICCV. 1890--1899."},{"key":"e_1_3_2_2_29_1","unstructured":"Shuang Li Tong Xiao Hongsheng Li Bolei Zhou Dayu Yue and Xiaogang Wang. 2017b. Person search with natural language description. In CVPR. 1970--1979."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.107016"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"crossref","unstructured":"Shengcai Liao Yang Hu Xiangyu Zhu and Stan Z Li. 2015. Person re-identification by local maximal occurrence representation and metric learning. In CVPR. 2197--2206.","DOI":"10.1109\/CVPR.2015.7298832"},{"key":"e_1_3_2_2_32_1","volume-title":"Improving person re-identification by attribute and identity learning. Pattern recognition","author":"Lin Yutian","year":"2019","unstructured":"Yutian Lin, Liang Zheng, Zhedong Zheng, Yu Wu, Zhilan Hu, Chenggang Yan, and Yi Yang. 2019. Improving person re-identification by attribute and identity learning. Pattern recognition, Vol. 95 (2019), 151--161."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.01.027"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"crossref","unstructured":"Jiawei Liu Zheng-Jun Zha Richang Hong Meng Wang and Yongdong Zhang. 2019. Deep adversarial graph attention convolution network for text-based person search. In ACM MM. 665--673.","DOI":"10.1145\/3343031.3350991"},{"key":"e_1_3_2_2_35_1","volume-title":"Hydraplus-net: Attentive deep features for pedestrian analysis. In ICCV. 350--359.","author":"Liu Xihui","year":"2017","unstructured":"Xihui Liu, Haiyu Zhao, Maoqing Tian, Lu Sheng, Jing Shao, Shuai Yi, Junjie Yan, and Xiaogang Wang. 2017. Hydraplus-net: Attentive deep features for pedestrian analysis. In ICCV. 350--359."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"crossref","unstructured":"Ze Liu Yutong Lin Yue Cao Han Hu Yixuan Wei Zheng Zhang Stephen Lin and Baining Guo. 2021a. Swin transformer: Hierarchical vision transformer using shifted windows. In ICCV. 10012--10022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_2_37_1","volume-title":"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. In ICCV.","author":"Liu Ze","year":"2021","unstructured":"Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, and Baining Guo. 2021b. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. In ICCV."},{"key":"e_1_3_2_2_38_1","volume-title":"Decoupled Weight Decay Regularization. In International Conference on Learning Representations (ICLR).","author":"Loshchilov Ilya","year":"2018","unstructured":"Ilya Loshchilov and Frank Hutter. 2018. Decoupled Weight Decay Regularization. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_39_1","volume-title":"Learning what and where from attributes to improve person re-identification","author":"Luo Jinghao","unstructured":"Jinghao Luo, Yaohua Liu, Changxin Gao, and Nong Sang. 2019. Learning what and where from attributes to improve person re-identification. In ICIP. IEEE, 165--169."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"crossref","unstructured":"Binh X Nguyen Binh D Nguyen Tuong Do Erman Tjiputra Quang D Tran and Anh Nguyen. 2021. Graph-based person signature for person re-identifications. In CVPR. 3492--3501.","DOI":"10.1109\/CVPRW53098.2021.00388"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2984883"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"crossref","unstructured":"Scott Reed Zeynep Akata Honglak Lee and Bernt Schiele. 2016. Learning deep representations of fine-grained visual descriptions. In CVPR. 49--58.","DOI":"10.1109\/CVPR.2016.13"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48881-3_2"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-resolution image synthesis with latent diffusion models. In CVPR. 10684--10695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"crossref","unstructured":"Mert Bulent Sariyildiz Karteek Alahari Diane Larlus and Yannis Kalantidis. 2023. Fake it till you make it: Learning transferable representations from synthetic ImageNet clones. In CVPR.","DOI":"10.1109\/CVPR52729.2023.00774"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"crossref","unstructured":"Zhiyin Shao Xinyu Zhang Meng Fang Zhifeng Lin Jian Wang and Changxing Ding. 2022. Learning Granularity-Unified Representations for Text-to-Image Person Re-identification. In ACM MM. 5566--5574.","DOI":"10.1145\/3503161.3548028"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3042068"},{"key":"e_1_3_2_2_48_1","volume-title":"Wei Xiang, and Clinton Fookes.","author":"Shipard Jordan","year":"2023","unstructured":"Jordan Shipard, Arnold Wiliem, Kien Nguyen Thanh, Wei Xiang, and Clinton Fookes. 2023. Diversity is Definitely Needed: Improving Model-Agnostic Zero-shot Classification via Stable Diffusion. arxiv: 2302.03298 [cs.CV]"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-25072-9_42"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"crossref","unstructured":"Tomas Simon Hanbyul Joo Iain Matthews and Yaser Sheikh. 2017. Hand Keypoint Detection in Single Images using Multiview Bootstrapping. In CVPR.","DOI":"10.1109\/CVPR.2017.494"},{"key":"e_1_3_2_2_51_1","unstructured":"Xiaoxiao Sun and Liang Zheng. 2019. Dissecting person re-identification from the viewpoint of viewpoint. In CVPR. 608--617."},{"key":"e_1_3_2_2_52_1","volume-title":"A Simple and Robust Correlation Filtering Method for Text-Based Person Search","author":"Suo Wei","unstructured":"Wei Suo, Mengyang Sun, Kai Niu, Yiqi Gao, Peng Wang, Yanning Zhang, and Qi Wu. 2022. A Simple and Robust Correlation Filtering Method for Text-Based Person Search. In ECCV. Springer, 726--742."},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"crossref","unstructured":"Chufeng Tang Lu Sheng Zhaoxiang Zhang and Xiaolin Hu. 2019a. Improving pedestrian attribute recognition with weakly-supervised multi-scale attribute-specific localization. In ICCV. 4997--5006.","DOI":"10.1109\/ICCV.2019.00510"},{"key":"e_1_3_2_2_54_1","volume-title":"Learning semantic representation on visual attribute graph for person re-identification and beyond. ACM Transactions on Multimedia Computing, Communications and Applications","author":"Tang Geyu","year":"2022","unstructured":"Geyu Tang, Xingyu Gao, and Zhenyu Chen. 2022. Learning semantic representation on visual attribute graph for person re-identification and beyond. ACM Transactions on Multimedia Computing, Communications and Applications (2022)."},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"crossref","unstructured":"Hao Tang Dan Xu Gaowen Liu Wei Wang Nicu Sebe and Yan Yan. 2019b. Cycle in cycle generative adversarial networks for keypoint-guided image generation. In ACM MM. 2052--2060.","DOI":"10.1145\/3343031.3350980"},{"key":"e_1_3_2_2_56_1","volume-title":"Aanet: Attribute attention network for person re-identifications. In CVPR. 7134--7143.","author":"Tay Chiat-Pin","year":"2019","unstructured":"Chiat-Pin Tay, Sharmili Roy, and Kim-Hui Yap. 2019. Aanet: Attribute attention network for person re-identifications. In CVPR. 7134--7143."},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"crossref","unstructured":"Chengji Wang Zhiming Luo Yaojin Lin and Shaozi Li. 2021. Text-based person search via multi-granularity embedding learning. In IJCAI. 1068--1074.","DOI":"10.24963\/ijcai.2021\/148"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"crossref","unstructured":"Jingya Wang Xiatian Zhu Shaogang Gong and Wei Li. 2018. Transferable joint attribute-identity deep learning for unsupervised person re-identification. In CVPR. 2275--2284.","DOI":"10.1109\/CVPR.2018.00242"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"crossref","unstructured":"Yanan Wang Shengcai Liao and Ling Shao. 2020b. Surpassing real-world source training data: Random 3d characters for generalizable person re-identification. In ACM MM. 3422--3430.","DOI":"10.1145\/3394171.3413815"},{"key":"e_1_3_2_2_60_1","volume-title":"Vitaa: Visual-textual attributes alignment in person search by natural language. In ECCV. 402--420.","author":"Wang Zhe","year":"2020","unstructured":"Zhe Wang, Zhiyuan Fang, Jun Wang, and Yezhou Yang. 2020a. Vitaa: Visual-textual attributes alignment in person search by natural language. In ECCV. 402--420."},{"key":"e_1_3_2_2_61_1","volume-title":"CAIBC: Capturing All-round Information Beyond Color for Text-based Person Retrieval. In ACM MM. 5314--5322.","author":"Wang Zijie","year":"2022","unstructured":"Zijie Wang, Aichun Zhu, Jingyi Xue, Xili Wan, Chao Liu, Tian Wang, and Yifeng Li. 2022a. CAIBC: Capturing All-round Information Beyond Color for Text-based Person Retrieval. In ACM MM. 5314--5322."},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"crossref","unstructured":"Zijie Wang Aichun Zhu Jingyi Xue Xili Wan Chao Liu Tian Wang and Yifeng Li. 2022b. Look Before You Leap: Improving Text-based Person Retrieval by Learning A Consistent Cross-modal Common Manifold. In ACM MM. 1984--1992.","DOI":"10.1145\/3503161.3548166"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.29.4.043028"},{"key":"e_1_3_2_2_64_1","volume-title":"EDA: Easy Data Augmentation Techniques for Boosting Performance on Text Classification Tasks. In EMNLP-IJCNLP. 6382--6388.","author":"Wei Jason","year":"2019","unstructured":"Jason Wei and Kai Zou. 2019. EDA: Easy Data Augmentation Techniques for Boosting Performance on Text Classification Tasks. In EMNLP-IJCNLP. 6382--6388."},{"key":"e_1_3_2_2_65_1","unstructured":"Longhui Wei Shiliang Zhang Wen Gao and Qi Tian. 2018. Person transfer gan to bridge domain gap for person re-identification. In CVPR. 79--88."},{"key":"e_1_3_2_2_66_1","unstructured":"Shih-En Wei Varun Ramakrishna Takeo Kanade and Yaser Sheikh. 2016. Convolutional pose machines. In CVPR."},{"key":"e_1_3_2_2_67_1","volume-title":"Less is more: Learning from synthetic data with fine-grained attributes for person re-identification. ACM Transactions on Multimedia Computing, Communications and Applications","author":"Xiang Suncheng","year":"2021","unstructured":"Suncheng Xiang, Dahong Qian, Mengyuan Guan, Binjie Yan, Ting Liu, Yuzhuo Fu, and Guanjie You. 2021. Less is more: Learning from synthetic data with fine-grained attributes for person re-identification. ACM Transactions on Multimedia Computing, Communications and Applications (2021)."},{"key":"e_1_3_2_2_68_1","volume-title":"End-to-end deep learning for person search. arXiv preprint arXiv:1604.01850","author":"Xiao Tong","year":"2016","unstructured":"Tong Xiao, Shuang Li, Bochao Wang, Liang Lin, and Xiaogang Wang. 2016. End-to-end deep learning for person search. arXiv preprint arXiv:1604.01850, Vol. 2, 2 (2016), 4."},{"key":"e_1_3_2_2_69_1","volume-title":"CLIP-Driven Fine-grained Text-Image Person Re-identification. arXiv preprint arXiv:2210.10276","author":"Yan Shuanglin","year":"2022","unstructured":"Shuanglin Yan, Neng Dong, Liyan Zhang, and Jinhui Tang. 2022. CLIP-Driven Fine-grained Text-Image Person Re-identification. arXiv preprint arXiv:2210.10276 (2022)."},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2912844"},{"key":"e_1_3_2_2_71_1","doi-asserted-by":"crossref","unstructured":"Ying Zhang and Huchuan Lu. 2018. Deep cross-modal projection learning for image-text matching. In ECCV. 686--701.","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"e_1_3_2_2_72_1","doi-asserted-by":"crossref","unstructured":"Kecheng Zheng Wu Liu Jiawei Liu Zheng-Jun Zha and Tao Mei. 2020a. Hierarchical Gumbel Attention Network for Text-based Person Search. In ACM MM.","DOI":"10.1145\/3394171.3413864"},{"key":"e_1_3_2_2_73_1","doi-asserted-by":"crossref","unstructured":"Liang Zheng Liyue Shen Lu Tian Shengjin Wang Jingdong Wang and Qi Tian. 2015. Scalable Person Re-Identification: A Benchmark. In ICCV.","DOI":"10.1109\/ICCV.2015.133"},{"key":"e_1_3_2_2_74_1","volume-title":"Person re-identification by probabilistic relative distance comparison","author":"Zheng Wei-Shi","unstructured":"Wei-Shi Zheng, Shaogang Gong, and Tao Xiang. 2011. Person re-identification by probabilistic relative distance comparison. In CVPR. IEEE, 649--656."},{"key":"e_1_3_2_2_75_1","unstructured":"Zhedong Zheng Xiaodong Yang Zhiding Yu Liang Zheng Yi Yang and Jan Kautz. 2019. Joint discriminative and generative learning for person re-identification. In CVPR."},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383184"},{"key":"e_1_3_2_2_77_1","unstructured":"Zhedong Zheng Liang Zheng and Yi Yang. 2017. Unlabeled samples generated by gan improve the person re-identification baseline in vitro. In ICCV. 3754--3762."},{"key":"e_1_3_2_2_78_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7000"},{"key":"e_1_3_2_2_79_1","volume-title":"DSSL: Deep Surroundings-person Separation Learning for Text-based Person Retrieval. In ACM MM. 209--217.","author":"Zhu Aichun","year":"2021","unstructured":"Aichun Zhu, Zijie Wang, Yifeng Li, Xili Wan, Jing Jin, Tian Wang, Fangqiang Hu, and Gang Hua. 2021. DSSL: Deep Surroundings-person Separation Learning for Text-based Person Retrieval. In ACM MM. 209--217."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611709","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611709","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:05:53Z","timestamp":1755821153000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611709"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":79,"alternative-id":["10.1145\/3581783.3611709","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611709","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}