{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,22]],"date-time":"2026-07-22T16:43:14Z","timestamp":1784738594609,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2018AAA0102001"],"award-info":[{"award-number":["2018AAA0102001"]}]},{"name":"National Natural Science Foundation of China","award":["U1903215; 61725202"],"award-info":[{"award-number":["U1903215; 61725202"]}]},{"name":"Fundamental Research Funds for the Central Universities","award":["DUT20RC(3)083"],"award-info":[{"award-number":["DUT20RC(3)083"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475202","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T05:04:15Z","timestamp":1634533455000},"page":"516-525","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":142,"title":["HAT: Hierarchical Aggregation Transformers for Person Re-identification"],"prefix":"10.1145","author":[{"given":"Guowen","family":"Zhang","sequence":"first","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pingping","family":"Zhang","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jinqing","family":"Qi","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Huchuan","family":"Lu","sequence":"additional","affiliation":[{"name":"Dalian University of Technology &amp; Pengcheng Lab, Dalian, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Binghui Chen Weihong Deng and Jiani Hu. 2019 a. Mixed high-order attention network for person re-identification. In ICCV. 371--381.  Binghui Chen Weihong Deng and Jiani Hu. 2019 a. Mixed high-order attention network for person re-identification. In ICCV. 371--381.","DOI":"10.1109\/ICCV.2019.00046"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Tianlong Chen Shaojin Ding Jingyi Xie Ye Yuan Wuyang Chen Yang Yang Zhou Ren and Zhangyang Wang. 2019 b. Abd-net: Attentive but diverse person re-identification. In ICCV. 8351--8361.  Tianlong Chen Shaojin Ding Jingyi Xie Ye Yuan Wuyang Chen Yang Yang Zhou Ren and Zhangyang Wang. 2019 b. Abd-net: Attentive but diverse person re-identification. In ICCV. 8351--8361.","DOI":"10.1109\/ICCV.2019.00844"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Xuesong Chen Canmiao Fu Yong Zhao Feng Zheng Jingkuan Song Rongrong Ji and Yi Yang. 2020. Salience-Guided Cascaded Suppression Network for Person Re-Identification. In CVPR. 3300--3310.  Xuesong Chen Canmiao Fu Yong Zhao Feng Zheng Jingkuan Song Rongrong Ji and Yi Yang. 2020. Salience-Guided Cascaded Suppression Network for Person Re-Identification. In CVPR. 3300--3310.","DOI":"10.1109\/CVPR42600.2020.00336"},{"key":"e_1_3_2_1_5_1","volume-title":"arXiv preprint arXiv:2103.15436","author":"Chen Xin","year":"2021","unstructured":"Xin Chen , Bin Yan , Jiawen Zhu , Dong Wang , Xiaoyun Yang , and Huchuan Lu. 2021. Transformer Tracking . arXiv preprint arXiv:2103.15436 ( 2021 ). Xin Chen, Bin Yan, Jiawen Zhu, Dong Wang, Xiaoyun Yang, and Huchuan Lu. 2021. Transformer Tracking. arXiv preprint arXiv:2103.15436 (2021)."},{"key":"e_1_3_2_1_6_1","unstructured":"Zuozhuo Dai Mingqiang Chen Xiaodong Gu Siyu Zhu and Ping Tan. 2019. Batch DropBlock network for person re-identification and beyond. In CVPR. 3691--3701.  Zuozhuo Dai Mingqiang Chen Xiaodong Gu Siyu Zhu and Ping Tan. 2019. Batch DropBlock network for person re-identification and beyond. In CVPR. 3691--3701."},{"key":"e_1_3_2_1_7_1","volume-title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. arxiv","author":"Dosovitskiy Alexey","year":"2010","unstructured":"Alexey Dosovitskiy , Lucas Beyer , Alexander Kolesnikov , Dirk Weissenborn , Xiaohua Zhai , Thomas Unterthiner , Mostafa Dehghani , Matthias Minderer , Georg Heigold , Sylvain Gelly , Jakob Uszkoreit , and Neil Houlsby . 2020. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. arxiv : 2010 .11929 [cs.CV] Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2020. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. arxiv: 2010.11929 [cs.CV]"},{"key":"e_1_3_2_1_8_1","volume-title":"Lars Petersson, and Mehrtash Harandi.","author":"Fang Pengfei","year":"2019","unstructured":"Pengfei Fang , Jieming Zhou , Soumava Kumar Roy , Lars Petersson, and Mehrtash Harandi. 2019 . Bilinear attention networks for person retrieval. In ICCV. 8030--8039. Pengfei Fang, Jieming Zhou, Soumava Kumar Roy, Lars Petersson, and Mehrtash Harandi. 2019. Bilinear attention networks for person retrieval. In ICCV. 8030--8039."},{"key":"e_1_3_2_1_9_1","volume-title":"A discriminatively trained, multiscale, deformable part model","author":"Felzenszwalb Pedro","unstructured":"Pedro Felzenszwalb , David McAllester , and Deva Ramanan . 2008. A discriminatively trained, multiscale, deformable part model . In CVPR. IEEE , 1--8. Pedro Felzenszwalb, David McAllester, and Deva Ramanan. 2008. A discriminatively trained, multiscale, deformable part model. In CVPR. IEEE, 1--8."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.167"},{"key":"e_1_3_2_1_11_1","volume-title":"ImageNet-trained CNNs are biased towards texture","author":"Geirhos Robert","year":"1811","unstructured":"Robert Geirhos , Patricia Rubisch , Claudio Michaelis , Matthias Bethge , Felix A Wichmann , and Wieland Brendel . 2018. ImageNet-trained CNNs are biased towards texture ; increasing shape bias improves accuracy and robustness. arXiv preprint arXiv: 1811 .12231 (2018). Robert Geirhos, Patricia Rubisch, Claudio Michaelis, Matthias Bethge, Felix A Wichmann, and Wieland Brendel. 2018. ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness. arXiv preprint arXiv:1811.12231 (2018)."},{"key":"e_1_3_2_1_12_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR. 770--778.  Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR. 770--778."},{"key":"e_1_3_2_1_13_1","volume-title":"TransReID: Transformer-based Object Re-Identification. arXiv preprint arXiv:2102.04378","author":"He Shuting","year":"2021","unstructured":"Shuting He , Hao Luo , Pichao Wang , Fan Wang , Hao Li , and Wei Jiang . 2021. TransReID: Transformer-based Object Re-Identification. arXiv preprint arXiv:2102.04378 ( 2021 ). Shuting He, Hao Luo, Pichao Wang, Fan Wang, Hao Li, and Wei Jiang. 2021. TransReID: Transformer-based Object Re-Identification. arXiv preprint arXiv:2102.04378 (2021)."},{"key":"e_1_3_2_1_14_1","volume-title":"Rethinking Spatial Dimensions of Vision Transformers. arXiv preprint arXiv:2103.16302","author":"Heo Byeongho","year":"2021","unstructured":"Byeongho Heo , Sangdoo Yun , Dongyoon Han , Sanghyuk Chun , Junsuk Choe , and Seong Joon Oh. 2021. Rethinking Spatial Dimensions of Vision Transformers. arXiv preprint arXiv:2103.16302 ( 2021 ). Byeongho Heo, Sangdoo Yun, Dongyoon Han, Sanghyuk Chun, Junsuk Choe, and Seong Joon Oh. 2021. Rethinking Spatial Dimensions of Vision Transformers. arXiv preprint arXiv:2103.16302 (2021)."},{"key":"e_1_3_2_1_15_1","unstructured":"Alexander Hermans Lucas Beyer and Bastian Leibe. 2017. In defense of the triplet loss for person re-identification. arXiv preprint arXiv:1703.07737 (2017).  Alexander Hermans Lucas Beyer and Bastian Leibe. 2017. In defense of the triplet loss for person re-identification. arXiv preprint arXiv:1703.07737 (2017)."},{"key":"e_1_3_2_1_16_1","unstructured":"Ruibing Hou Bingpeng Ma Hong Chang Xinqian Gu Shiguang Shan and Xilin Chen. 2019. Interaction-and-aggregation network for person re-identification. In CVPR. 9317--9326.  Ruibing Hou Bingpeng Ma Hong Chang Xinqian Gu Shiguang Shan and Xilin Chen. 2019. Interaction-and-aggregation network for person re-identification. In CVPR. 9317--9326."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Xin Jin Cuiling Lan Wenjun Zeng Zhibo Chen and Li Zhang. 2020. Style normalization and restitution for generalizable person re-identification. In CVPR. 3143--3152.  Xin Jin Cuiling Lan Wenjun Zeng Zhibo Chen and Li Zhang. 2020. Style normalization and restitution for generalizable person re-identification. In CVPR. 3143--3152.","DOI":"10.1109\/CVPR42600.2020.00321"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Mahdi M Kalayeh Emrah Basaran Muhittin G\u00f6kmen Mustafa E Kamasak and Mubarak Shah. 2018. Human semantic parsing for person re-identification. In CVPR. 1062--1071.  Mahdi M Kalayeh Emrah Basaran Muhittin G\u00f6kmen Mustafa E Kamasak and Mubarak Shah. 2018. Human semantic parsing for person re-identification. In CVPR. 1062--1071.","DOI":"10.1109\/CVPR.2018.00117"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"e_1_3_2_1_20_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba . 2014 . Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014). Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999257"},{"key":"e_1_3_2_1_22_1","volume-title":"Combined Depth Space based Architecture Search For Person Re-identification. arXiv preprint arXiv:2104.04163","author":"Li Hanjun","year":"2021","unstructured":"Hanjun Li , Gaojie Wu , and Wei-Shi Zheng . 2021. Combined Depth Space based Architecture Search For Person Re-identification. arXiv preprint arXiv:2104.04163 ( 2021 ). Hanjun Li, Gaojie Wu, and Wei-Shi Zheng. 2021. Combined Depth Space based Architecture Search For Person Re-identification. arXiv preprint arXiv:2104.04163 (2021)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.27"},{"key":"e_1_3_2_1_24_1","unstructured":"Tsung-Yi Lin Piotr Doll\u00e1r Ross Girshick Kaiming He Bharath Hariharan and Serge Belongie. 2017. Feature pyramid networks for object detection. In CVPR. 2117--2125.  Tsung-Yi Lin Piotr Doll\u00e1r Ross Girshick Kaiming He Bharath Hariharan and Serge Belongie. 2017. Feature pyramid networks for object detection. In CVPR. 2117--2125."},{"key":"e_1_3_2_1_25_1","volume-title":"A Video Is Worth Three Views: Trigeminal Transformers for Video-based Person Re-identification. arXiv preprint arXiv:2104.01745","author":"Liu Xuehu","year":"2021","unstructured":"Xuehu Liu , Pingping Zhang , Chenyang Yu , Huchuan Lu , Xuesheng Qian , and Xiaoyun Yang . 2021. A Video Is Worth Three Views: Trigeminal Transformers for Video-based Person Re-identification. arXiv preprint arXiv:2104.01745 ( 2021 ). Xuehu Liu, Pingping Zhang, Chenyang Yu, Huchuan Lu, Xuesheng Qian, and Xiaoyun Yang. 2021. A Video Is Worth Three Views: Trigeminal Transformers for Video-based Person Re-identification. arXiv preprint arXiv:2104.01745 (2021)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413689"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Hao Luo Youzhi Gu Xingyu Liao Shenqi Lai and Wei Jiang. 2019. Bag of Tricks and a Strong Baseline for Deep Person Re-Identification. In CVPRW .  Hao Luo Youzhi Gu Xingyu Liao Shenqi Lai and Wei Jiang. 2019. Bag of Tricks and a Strong Baseline for Deep Person Re-Identification. In CVPRW .","DOI":"10.1109\/CVPRW.2019.00190"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_30_1","unstructured":"Jianlou Si Honggang Zhang Chun-Guang Li Jason Kuen Xiangfei Kong Alex C Kot and Gang Wang. 2018. Dual attention matching network for context-aware feature sequence based person re-identification. In CVPR. 5363--5372.  Jianlou Si Honggang Zhang Chun-Guang Li Jason Kuen Xiangfei Kong Alex C Kot and Gang Wang. 2018. Dual attention matching network for context-aware feature sequence based person re-identification. In CVPR. 5363--5372."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Chunfeng Song Yan Huang Wanli Ouyang and Liang Wang. 2018. Mask-guided contrastive attention model for person re-identification. In CVPR. 1179--1188.  Chunfeng Song Yan Huang Wanli Ouyang and Liang Wang. 2018. Mask-guided contrastive attention model for person re-identification. In CVPR. 1179--1188.","DOI":"10.1109\/CVPR.2018.00129"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Chi Su Jianing Li Shiliang Zhang Junliang Xing Wen Gao and Qi Tian. 2017. Pose-driven deep convolutional model for person re-identification. In ICCV. 3960--3969.  Chi Su Jianing Li Shiliang Zhang Junliang Xing Wen Gao and Qi Tian. 2017. Pose-driven deep convolutional model for person re-identification. In ICCV. 3960--3969.","DOI":"10.1109\/ICCV.2017.427"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Ke Sun Bin Xiao Dong Liu and Jingdong Wang. 2019. Deep high-resolution representation learning for human pose estimation. In CVPR. 5693--5703.  Ke Sun Bin Xiao Dong Liu and Jingdong Wang. 2019. Deep high-resolution representation learning for human pose estimation. In CVPR. 5693--5703.","DOI":"10.1109\/CVPR.2019.00584"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Yifan Sun Liang Zheng Yi Yang Qi Tian and Shengjin Wang. 2018. Beyond part models: Person retrieval with refined part pooling (and a strong convolutional baseline). In ECCV. 480--496.  Yifan Sun Liang Zheng Yi Yang Qi Tian and Shengjin Wang. 2018. Beyond part models: Person retrieval with refined part pooling (and a strong convolutional baseline). In ECCV. 480--496.","DOI":"10.1007\/978-3-030-01225-0_30"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Christian Szegedy Wei Liu Yangqing Jia Pierre Sermanet Scott Reed Dragomir Anguelov Dumitru Erhan Vincent Vanhoucke and Andrew Rabinovich. 2015. Going deeper with convolutions. In CVPR. 1--9.  Christian Szegedy Wei Liu Yangqing Jia Pierre Sermanet Scott Reed Dragomir Anguelov Dumitru Erhan Vincent Vanhoucke and Andrew Rabinovich. 2015. Going deeper with convolutions. In CVPR. 1--9.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Christian Szegedy Vincent Vanhoucke Sergey Ioffe Jon Shlens and Zbigniew Wojna. 2016. Rethinking the inception architecture for computer vision. In CVPR. 2818--2826.  Christian Szegedy Vincent Vanhoucke Sergey Ioffe Jon Shlens and Zbigniew Wojna. 2016. Rethinking the inception architecture for computer vision. In CVPR. 2818--2826.","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_37_1","volume-title":"Aanet: Attribute attention network for person re-identifications. In CVPR. 7134--7143.","author":"Tay Chiat-Pin","year":"2019","unstructured":"Chiat-Pin Tay , Sharmili Roy , and Kim-Hui Yap . 2019 . Aanet: Attribute attention network for person re-identifications. In CVPR. 7134--7143. Chiat-Pin Tay, Sharmili Roy, and Kim-Hui Yap. 2019. Aanet: Attribute attention network for person re-identifications. In CVPR. 7134--7143."},{"key":"e_1_3_2_1_38_1","volume-title":"Training data-efficient image transformers & distillation through attention. arXiv preprint arXiv:2012.12877","author":"Touvron Hugo","year":"2020","unstructured":"Hugo Touvron , Matthieu Cord , Matthijs Douze , Francisco Massa , Alexandre Sablayrolles , and Herv\u00e9 J\u00e9gou . 2020. Training data-efficient image transformers & distillation through attention. arXiv preprint arXiv:2012.12877 ( 2020 ). Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2020. Training data-efficient image transformers & distillation through attention. arXiv preprint arXiv:2012.12877 (2020)."},{"key":"e_1_3_2_1_39_1","volume-title":"Attention is all you need. arXiv preprint arXiv:1706.03762","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , Lukasz Kaiser , and Illia Polosukhin . 2017. Attention is all you need. arXiv preprint arXiv:1706.03762 ( 2017 ). Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. arXiv preprint arXiv:1706.03762 (2017)."},{"key":"e_1_3_2_1_40_1","volume-title":"Mancs: A multi-task attentional network with curriculum sampling for person re-identification. In ECCV. 365--381.","author":"Wang Cheng","year":"2018","unstructured":"Cheng Wang , Qian Zhang , Chang Huang , Wenyu Liu , and Xinggang Wang . 2018 c. Mancs: A multi-task attentional network with curriculum sampling for person re-identification. In ECCV. 365--381. Cheng Wang, Qian Zhang, Chang Huang, Wenyu Liu, and Xinggang Wang. 2018c. Mancs: A multi-task attentional network with curriculum sampling for person re-identification. In ECCV. 365--381."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240552"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Xiaolong Wang Ross Girshick Abhinav Gupta and Kaiming He. 2018a. Non-local neural networks. In CVPR. 7794--7803.  Xiaolong Wang Ross Girshick Abhinav Gupta and Kaiming He. 2018a. Non-local neural networks. In CVPR. 7794--7803.","DOI":"10.1109\/CVPR.2018.00813"},{"key":"e_1_3_2_1_43_1","unstructured":"Longhui Wei Shiliang Zhang Wen Gao and Qi Tian. 2018. Person transfer gan to bridge domain gap for person re-identification. In CVPR. 79--88.  Longhui Wei Shiliang Zhang Wen Gao and Qi Tian. 2018. Person transfer gan to bridge domain gap for person re-identification. In CVPR. 79--88."},{"key":"e_1_3_2_1_44_1","unstructured":"Saining Xie Ross Girshick Piotr Doll\u00e1r Zhuowen Tu and Kaiming He. 2017. Aggregated residual transformations for deep neural networks. In CVPR. 1492--1500.  Saining Xie Ross Girshick Piotr Doll\u00e1r Zhuowen Tu and Kaiming He. 2017. Aggregated residual transformations for deep neural networks. In CVPR. 1492--1500."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Wenjie Yang Houjing Huang Zhang Zhang Xiaotang Chen Kaiqi Huang and Shu Zhang. 2019. Towards rich feature discovery with class activation maps augmentation for person re-identification. In CVPR. 1389--1398.  Wenjie Yang Houjing Huang Zhang Zhang Xiaotang Chen Kaiqi Huang and Shu Zhang. 2019. Towards rich feature discovery with class activation maps augmentation for person re-identification. In CVPR. 1389--1398.","DOI":"10.1109\/CVPR.2019.00148"},{"key":"e_1_3_2_1_46_1","unstructured":"Fisher Yu Dequan Wang Evan Shelhamer and Trevor Darrell. 2018. Deep layer aggregation. In CVPR. 2403--2412.  Fisher Yu Dequan Wang Evan Shelhamer and Trevor Darrell. 2018. Deep layer aggregation. In CVPR. 2403--2412."},{"key":"e_1_3_2_1_47_1","volume-title":"The devil is in the middle: Exploiting mid-level representations for cross-domain instance matching. arXiv preprint arXiv:1711.08106","author":"Yu Qian","year":"2017","unstructured":"Qian Yu , Xiaobin Chang , Yi-Zhe Song , Tao Xiang , and Timothy M Hospedales . 2017. The devil is in the middle: Exploiting mid-level representations for cross-domain instance matching. arXiv preprint arXiv:1711.08106 ( 2017 ). Qian Yu, Xiaobin Chang, Yi-Zhe Song, Tao Xiang, and Timothy M Hospedales. 2017. The devil is in the middle: Exploiting mid-level representations for cross-domain instance matching. arXiv preprint arXiv:1711.08106 (2017)."},{"key":"e_1_3_2_1_48_1","volume-title":"Spatiotemporal Transformer for Video-based Person Re-identification. arXiv preprint arXiv:2103.16469","author":"Zhang Tianyu","year":"2021","unstructured":"Tianyu Zhang , Longhui Wei , Lingxi Xie , Zijie Zhuang , Yongfei Zhang , Bo Li , and Qi Tian . 2021. Spatiotemporal Transformer for Video-based Person Re-identification. arXiv preprint arXiv:2103.16469 ( 2021 ). Tianyu Zhang, Longhui Wei, Lingxi Xie, Zijie Zhuang, Yongfei Zhang, Bo Li, and Qi Tian. 2021. Spatiotemporal Transformer for Video-based Person Re-identification. arXiv preprint arXiv:2103.16469 (2021)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Zhizheng Zhang Cuiling Lan Wenjun Zeng Xin Jin and Zhibo Chen. 2020. Relation-Aware Global Attention for Person Re-identification. In CVPR. 3186--3195.  Zhizheng Zhang Cuiling Lan Wenjun Zeng Xin Jin and Zhibo Chen. 2020. Relation-Aware Global Attention for Person Re-identification. In CVPR. 3186--3195.","DOI":"10.1109\/CVPR42600.2020.00325"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Xiaoqi Zhao Youwei Pang Lihe Zhang Huchuan Lu and Lei Zhang. 2020. Suppress and Balance: A Simple Gated Network for Salient Object Detection. In ECCV .  Xiaoqi Zhao Youwei Pang Lihe Zhang Huchuan Lu and Lei Zhang. 2020. Suppress and Balance: A Simple Gated Network for Salient Object Detection. In ECCV .","DOI":"10.1007\/978-3-030-58536-5_3"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Feng Zheng Cheng Deng Xing Sun Xinyang Jiang Xiaowei Guo Zongqiao Yu Feiyue Huang and Rongrong Ji. 2019 a. Pyramidal person re-identification via multi-loss dynamic training. In CVPR. 8514--8522.  Feng Zheng Cheng Deng Xing Sun Xinyang Jiang Xiaowei Guo Zongqiao Yu Feiyue Huang and Rongrong Ji. 2019 a. Pyramidal person re-identification via multi-loss dynamic training. In CVPR. 8514--8522.","DOI":"10.1109\/CVPR.2019.00871"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.5555\/2919332.2919877"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"Meng Zheng Srikrishna Karanam Ziyan Wu and Richard J Radke. 2019 b. Re-identification with consistent attentive siamese networks. In CVPR. 5735--5744.  Meng Zheng Srikrishna Karanam Ziyan Wu and Richard J Radke. 2019 b. Re-identification with consistent attentive siamese networks. In CVPR. 5735--5744.","DOI":"10.1109\/CVPR.2019.00588"},{"key":"e_1_3_2_1_54_1","volume-title":"Philip HS Torr, et al","author":"Zheng Sixiao","year":"2020","unstructured":"Sixiao Zheng , Jiachen Lu , Hengshuang Zhao , Xiatian Zhu , Zekun Luo , Yabiao Wang , Yanwei Fu , Jianfeng Feng , Tao Xiang , Philip HS Torr, et al . 2020 . Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers . arXiv preprint arXiv:2012.15840 (2020). Sixiao Zheng, Jiachen Lu, Hengshuang Zhao, Xiatian Zhu, Zekun Luo, Yabiao Wang, Yanwei Fu, Jianfeng Feng, Tao Xiang, Philip HS Torr, et al. 2020. Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers. arXiv preprint arXiv:2012.15840 (2020)."},{"key":"e_1_3_2_1_55_1","unstructured":"Zhedong Zheng Xiaodong Yang Zhiding Yu Liang Zheng Yi Yang and Jan Kautz. 2019 c. Joint discriminative and generative learning for person re-identification. In CVPR. 2138--2147.  Zhedong Zheng Xiaodong Yang Zhiding Yu Liang Zheng Yi Yang and Jan Kautz. 2019 c. Joint discriminative and generative learning for person re-identification. In CVPR. 2138--2147."},{"key":"e_1_3_2_1_56_1","unstructured":"Zhedong Zheng Liang Zheng and Yi Yang. 2017. Unlabeled samples generated by gan improve the person re-identification baseline in vitro. In ICCV. 3754--3762.  Zhedong Zheng Liang Zheng and Yi Yang. 2017. Unlabeled samples generated by gan improve the person re-identification baseline in vitro. In ICCV. 3754--3762."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Zhun Zhong Liang Zheng Donglin Cao and Shaozi Li. 2017. Re-ranking person re-identification with k-reciprocal encoding. In CVPR. 1318--1327.  Zhun Zhong Liang Zheng Donglin Cao and Shaozi Li. 2017. Re-ranking person re-identification with k-reciprocal encoding. In CVPR. 1318--1327.","DOI":"10.1109\/CVPR.2017.389"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"crossref","unstructured":"Kaiyang Zhou Yongxin Yang Andrea Cavallaro and Tao Xiang. 2019 b. Omni-scale feature learning for person re-identification. In ICCV. 3702--3712.  Kaiyang Zhou Yongxin Yang Andrea Cavallaro and Tao Xiang. 2019 b. Omni-scale feature learning for person re-identification. In ICCV. 3702--3712.","DOI":"10.1109\/ICCV.2019.00380"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00813"},{"key":"e_1_3_2_1_61_1","volume-title":"Identity-Guided Human Semantic Parsing for Person Re-Identification. arXiv preprint arXiv:2007.13467","author":"Zhu Kuan","year":"2020","unstructured":"Kuan Zhu , Haiyun Guo , Zhiwei Liu , Ming Tang , and Jinqiao Wang . 2020. Identity-Guided Human Semantic Parsing for Person Re-Identification. arXiv preprint arXiv:2007.13467 ( 2020 ). Kuan Zhu, Haiyun Guo, Zhiwei Liu, Ming Tang, and Jinqiao Wang. 2020. Identity-Guided Human Semantic Parsing for Person Re-Identification. arXiv preprint arXiv:2007.13467 (2020)."},{"key":"e_1_3_2_1_62_1","volume-title":"AAformer: Auto-Aligned Transformer for Person Re-Identification. arXiv preprint arXiv:2104.00921","author":"Zhu Kuan","year":"2021","unstructured":"Kuan Zhu , Haiyun Guo , Shiliang Zhang , Yaowei Wang , Gaopan Huang , Honglin Qiao , Jing Liu , Jinqiao Wang , and Ming Tang . 2021. AAformer: Auto-Aligned Transformer for Person Re-Identification. arXiv preprint arXiv:2104.00921 ( 2021 ). Kuan Zhu, Haiyun Guo, Shiliang Zhang, Yaowei Wang, Gaopan Huang, Honglin Qiao, Jing Liu, Jinqiao Wang, and Ming Tang. 2021. AAformer: Auto-Aligned Transformer for Person Re-Identification. arXiv preprint arXiv:2104.00921 (2021)."}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475202","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475202","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:16Z","timestamp":1750193296000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475202"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":62,"alternative-id":["10.1145\/3474085.3475202","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475202","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}