{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T21:01:16Z","timestamp":1767992476083,"version":"3.49.0"},"reference-count":88,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2022,2,2]],"date-time":"2022-02-02T00:00:00Z","timestamp":1643760000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,2,2]],"date-time":"2022-02-02T00:00:00Z","timestamp":1643760000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"national natural science foundation of china","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1007\/s11263-021-01569-2","type":"journal-article","created":{"date-parts":[[2022,2,2]],"date-time":"2022-02-02T04:43:13Z","timestamp":1643776993000},"page":"800-819","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["From Individual to Whole: Reducing Intra-class Variance by Feature Aggregation"],"prefix":"10.1007","volume":"130","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2648-3875","authenticated-orcid":false,"given":"Zhaoxiang","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Chuanchen","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Haiping","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Yuntao","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Naiyan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Chunfeng","family":"Song","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,2,2]]},"reference":[{"key":"1569_CR1","doi-asserted-by":"crossref","unstructured":"Carreira-Perpi\u00f1\u00e1n, M. \u00c1. (2006). Fast nonparametric clustering with gaussian blurring mean-shift. In ICML.","DOI":"10.1145\/1143844.1143864"},{"key":"1569_CR2","doi-asserted-by":"crossref","unstructured":"Chang, X., Hospedales, T. M., & Xiang, T. (2018). Multi-level factorisation net for person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2018.00225"},{"key":"1569_CR3","doi-asserted-by":"crossref","unstructured":"Chen, D., Li, H., Xiao, T., Yi, S., & Wang, X. (2018a). Video person re-identification with competitive snippet-similarity aggregation and co-attentive snippet embedding. In CVPR.","DOI":"10.1109\/CVPR.2018.00128"},{"key":"1569_CR4","doi-asserted-by":"crossref","unstructured":"Chen, G., Zhang, T., Lu, J., & Zhou, J. (2019). Deep meta metric learning. In ICCV.","DOI":"10.1109\/ICCV.2019.00964"},{"key":"1569_CR5","doi-asserted-by":"crossref","unstructured":"Chen, K., Wang, J., Yang, S., Zhang, X., Xiong, Y., Loy, C. C., & Lin, D. (2018b). Optimizing video object detection via a scale-time lattice. In CVPR.","DOI":"10.1109\/CVPR.2018.00815"},{"key":"1569_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Y., Zhu, X., & Gong, S. (2017). Person re-identification by deep learning multi-scale representations. In ICCV.","DOI":"10.1109\/ICCVW.2017.304"},{"key":"1569_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Z., Huang, S., & Tao, D. (2018c). Context refinement for object detection. In ECCV.","DOI":"10.1007\/978-3-030-01237-3_5"},{"key":"1569_CR8","doi-asserted-by":"crossref","unstructured":"Cubuk, E. D., Zoph, B., Mane, D., Vasudevan, V., & Le, Q. V. (2019). Autoaugment: Learning augmentation strategies from data. In CVPR.","DOI":"10.1109\/CVPR.2019.00020"},{"key":"1569_CR9","doi-asserted-by":"crossref","unstructured":"Cubuk, E. D., Zoph, B., Shlens, J., & Le, Q. V. (2020). Randaugment: Practical automated data augmentation with a reduced search space. In CVPRW.","DOI":"10.1109\/CVPRW50498.2020.00359"},{"key":"1569_CR10","doi-asserted-by":"crossref","unstructured":"Damen, D., Doughty, H., Maria\u00a0Farinella, G., Fidler, S., Furnari, A., Kazakos, E., Moltisanti, D., Munro, J., Perrett, T., Price, W., et\u00a0al. (2018). Scaling egocentric vision: The epic-kitchens dataset. In Proceedings of the European conference on computer vision (ECCV) (pp. 720\u2013736).","DOI":"10.1007\/978-3-030-01225-0_44"},{"key":"1569_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., & Fei-Fei, L. (2009). ImageNet: A large-scale hierarchical image database. In CVPR.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1569_CR12","unstructured":"DeVries, T. & Taylor, G. W. (2017). Improved regularization of convolutional neural networks with cutout. arXiv:1708.04552."},{"key":"1569_CR13","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., Fischer, P., Ilg, E., Hausser, P., Hazirbas, C., Golkov, V., Van Der\u00a0Smagt, P., Cremers, D., & Brox, T. (2015). FlowNet: Learning optical flow with convolutional networks. In CVPR.","DOI":"10.1109\/ICCV.2015.316"},{"key":"1569_CR14","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., & Zisserman, A. (2017). Detect to track and track to detect. In ICCV.","DOI":"10.1109\/ICCV.2017.330"},{"key":"1569_CR15","doi-asserted-by":"crossref","unstructured":"Fu, Y., Wang, X., Wei, Y., & Huang, T. (2019a). Sta: Spatial-temporal attention for large-scale video-based person re-identification. In AAAI.","DOI":"10.1109\/ICIP.2019.8803321"},{"key":"1569_CR16","doi-asserted-by":"crossref","unstructured":"Fu, Y., Wei, Y., Zhou, Y., Shi, H., Huang, G., Wang, X., Yao, Z., & Huang, T. (2019b). Horizontal pyramid matching for person re-identification. In AAAI.","DOI":"10.1609\/aaai.v33i01.33018295"},{"key":"1569_CR17","doi-asserted-by":"crossref","unstructured":"Gu, X., Ma, B., Chang, H., Shan, S., & Chen, X. (2019). Temporal knowledge propagation for image-to-video person re-identification. In ICCV.","DOI":"10.1109\/ICCV.2019.00974"},{"key":"1569_CR18","doi-asserted-by":"crossref","unstructured":"Hadsell, R., Chopra, S., & LeCun, Y. (2006). Dimensionality reduction by learning an invariant mapping. In CVPR.","DOI":"10.1109\/CVPR.2006.100"},{"key":"1569_CR19","unstructured":"Han, W., Khorrami, P., Paine, T. L., Ramachandran, P., Babaeizadeh, M., Shi, H., Li, J., Yan, S., & Huang, T. S. (2016). Seq-NMS for video object detection. arXiv:1602.08465."},{"key":"1569_CR20","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2015). Delving deep into rectifiers: Surpassing human-level performance on imagenet classification. In ICCV.","DOI":"10.1109\/ICCV.2015.123"},{"key":"1569_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In CVPR.","DOI":"10.1109\/CVPR.2016.90"},{"key":"1569_CR22","unstructured":"Hermans, A., Beyer, L., & Leibe, B. (2017). In defense of the triplet loss for person re-identification. arXiv:1703.07737."},{"key":"1569_CR23","doi-asserted-by":"crossref","unstructured":"Hou, R., Ma, B., Chang, H., Gu, X., Shan, S., & Chen, X. (2019). VRSTC: Occlusion-free video person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2019.00735"},{"key":"1569_CR24","doi-asserted-by":"crossref","unstructured":"Hu, H., Gu, J., Zhang, Z., Dai, J., & Wei, Y. (2018). Relation networks for object detection. In CVPR.","DOI":"10.1109\/CVPR.2018.00378"},{"key":"1569_CR25","unstructured":"Ioffe, S. & Szegedy, C. (2015). Batch normalization: Accelerating deep network training by reducing internal covariate shift. In ICML."},{"key":"1569_CR26","doi-asserted-by":"crossref","unstructured":"Jegou, H., Harzallah, H., & Schmid, C. (2007). A contextual dissimilarity measure for accurate and efficient image search. In CVPR.","DOI":"10.1109\/CVPR.2007.382970"},{"key":"1569_CR27","doi-asserted-by":"crossref","unstructured":"Kang, K., Li, H., Yan, J., Zeng, X., Yang, B., Xiao, T., Zhang, C., Wang, Z., Wang, R., Wang, X., et\u00a0al. (2017). T-CNN: Tubelets with convolutional neural networks for object detection from videos. In TCSVT.","DOI":"10.1109\/TCSVT.2017.2736553"},{"key":"1569_CR28","doi-asserted-by":"crossref","unstructured":"Kang, K., Ouyang, W., Li, H., & Wang, X. (2016). Object detection from video tubelets with convolutional neural networks. In CVPR.","DOI":"10.1109\/CVPR.2016.95"},{"key":"1569_CR29","doi-asserted-by":"crossref","unstructured":"Li, J., Wang, J., Tian, Q., Gao, W., & Zhang, S. (2019a). Global-local temporal representations for video person re-identification. In ICCV.","DOI":"10.1109\/ICCV.2019.00406"},{"key":"1569_CR30","doi-asserted-by":"crossref","unstructured":"Li, J., Zhang, S., & Huang, T. (2019b). Multi-scale 3d convolution network for video based person re-identification. In AAAI.","DOI":"10.1609\/aaai.v33i01.33018618"},{"key":"1569_CR31","doi-asserted-by":"crossref","unstructured":"Li, S., Bak, S., Carr, P., & Wang, X. (2018a). Diversity regularized spatiotemporal attention for video-based person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2018.00046"},{"key":"1569_CR32","doi-asserted-by":"crossref","unstructured":"Li, W., Zhao, R., Xiao, T., & Wang, X. (2014). DeepReID: Deep filter pairing neural network for person re-identification. In ICCV.","DOI":"10.1109\/CVPR.2014.27"},{"key":"1569_CR33","doi-asserted-by":"crossref","unstructured":"Li, W., Zhu, X., & Gong, S. (2018b). Harmonious attention network for person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2018.00243"},{"key":"1569_CR34","unstructured":"Lin, Y., Zheng, L., Zheng, Z., Wu, Y., & Yang, Y. (2017). Improving person re-identification by attribute and identity learning. arXiv:1703.07220."},{"key":"1569_CR35","unstructured":"Liu, C.-T., Wu, C.-W., Wang, Y.-C. F., & Chien, S.-Y. (2019). Spatially and temporally efficient non-local attention network for video-based person re-identification. In BMVC."},{"key":"1569_CR36","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.-Y., & Berg, A. C. (2016a). SSD: Single shot multibox detector. In ECCV.","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1569_CR37","doi-asserted-by":"crossref","unstructured":"Liu, W., Wen, Y., Yu, Z., Li, M., Raj, B., & Song, L. (2017). Sphereface: Deep hypersphere embedding for face recognition. In CVPR.","DOI":"10.1109\/CVPR.2017.713"},{"key":"1569_CR38","unstructured":"Liu, W., Wen, Y., Yu, Z., & Yang, M. (2016b). Large-margin softmax loss for convolutional neural networks. In ICML."},{"key":"1569_CR39","doi-asserted-by":"crossref","unstructured":"Lu, Y., Lu, C., & Tang, C.-K. (2017). Online video object detection using association LSTM. In ICCV.","DOI":"10.1109\/ICCV.2017.257"},{"key":"1569_CR40","doi-asserted-by":"crossref","unstructured":"Luo, C., Chen, Y., Wang, N., & Zhang, Z. (2019a). Spectral feature transformation for person re-identification. In ICCV.","DOI":"10.1109\/ICCV.2019.00508"},{"key":"1569_CR41","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/j.patcog.2019.05.028","volume":"94","author":"H Luo","year":"2019","unstructured":"Luo, H., Jiang, W., Zhang, X., Fan, X., Qian, J., & Zhang, C. (2019b). Alignedreid++: Dynamically matching local information for person re-identification. Pattern Recognition, 94, 53\u201361.","journal-title":"Pattern Recognition"},{"key":"1569_CR42","unstructured":"Meila, M. & Shi, J. (2001). A random walks view of spectral segmentation. In AISTATS."},{"key":"1569_CR43","doi-asserted-by":"crossref","unstructured":"Movshovitzattias, Y., Toshev, A., Leung, T. K., Ioffe, S., & Singh, S. (2017). No fuss distance metric learning using proxies. In ICCV.","DOI":"10.1109\/ICCV.2017.47"},{"key":"1569_CR44","doi-asserted-by":"crossref","unstructured":"Oh\u00a0Song, H., Xiang, Y., Jegelka, S., & Savarese, S. (2016). Deep metric learning via lifted structured feature embedding. In CVPR.","DOI":"10.1109\/CVPR.2016.434"},{"key":"1569_CR45","doi-asserted-by":"crossref","unstructured":"Qian, X., Fu, Y., Jiang, Y.-G., Xiang, T., & Xue, X. (2017). Multi-scale deep learning architectures for person re-identification. In ICCV.","DOI":"10.1109\/ICCV.2017.577"},{"key":"1569_CR46","doi-asserted-by":"crossref","unstructured":"Qin, D., Gammeter, S., Bossard, L., Quack, T., & Van\u00a0Gool, L. (2011). Hello neighbor: Accurate object retrieval with k-reciprocal nearest neighbors. In CVPR.","DOI":"10.1109\/CVPR.2011.5995373"},{"key":"1569_CR47","doi-asserted-by":"crossref","unstructured":"Ristani, E., Solera, F., Zou, R., Cucchiara, R., & Tomasi, C. (2016). Performance measures and a data set for multi-target, multi-camera tracking. In ECCV workshop.","DOI":"10.1007\/978-3-319-48881-3_2"},{"key":"1569_CR48","doi-asserted-by":"crossref","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Khosla, A., Bernstein, M., Berg, A. C., & Fei-Fei, L. (2015). ImageNet large scale visual recognition challenge. In IJCV.","DOI":"10.1007\/s11263-015-0816-y"},{"key":"1569_CR49","doi-asserted-by":"crossref","unstructured":"Sarfraz, M. S., Schumann, A., Eberle, A., & Stiefelhagen, R. (2018). A pose-sensitive embedding for person re-identification with expanded cross neighborhood re-ranking. In CVPR.","DOI":"10.1109\/CVPR.2018.00051"},{"key":"1569_CR50","doi-asserted-by":"crossref","unstructured":"Schroff, F., Kalenichenko, D., & Philbin, J. (2015). FaceNet: A unified embedding for face recognition and clustering. In CVPR.","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"1569_CR51","doi-asserted-by":"crossref","unstructured":"Shen, Y., Li, H., Xiao, T., Yi, S., Chen, D., & Wang, X. (2018a). Deep group-shuffling random walk for person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2018.00241"},{"key":"1569_CR52","doi-asserted-by":"crossref","unstructured":"Shen, Y., Li, H., Yi, S., Chen, D., & Wang, X. (2018b). Person re-identification with deep similarity-guided graph neural network. In ECCV.","DOI":"10.1007\/978-3-030-01267-0_30"},{"key":"1569_CR53","doi-asserted-by":"crossref","unstructured":"Si, J., Zhang, H., Li, C.-G., Kuen, J., Kong, X., Kot, A. C., & Wang, G. (2018). Dual attention matching network for context-aware feature sequence based person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2018.00562"},{"key":"1569_CR54","unstructured":"Sohn, K. (2016). Improved deep metric learning with multi-class n-pair loss objective. In NeurIPS."},{"key":"1569_CR55","doi-asserted-by":"crossref","unstructured":"Subramaniam, A., Nambiar, A., & Mittal, A. (2019). Co-segmentation inspired attention networks for video-based person re-identification. In ICCV.","DOI":"10.1109\/ICCV.2019.00065"},{"key":"1569_CR56","doi-asserted-by":"crossref","unstructured":"Suh, Y., Wang, J., Tang, S., Mei, T., & Lee, K. M. (2018). Part-aligned bilinear representations for person re-identification. In ECCV.","DOI":"10.1007\/978-3-030-01264-9_25"},{"key":"1569_CR57","doi-asserted-by":"crossref","unstructured":"Sun, Y., Zheng, L., Deng, W., & Wang, S. (2017). SVDNet for pedestrian retrieval. In ICCV.","DOI":"10.1109\/ICCV.2017.410"},{"key":"1569_CR58","doi-asserted-by":"crossref","unstructured":"Sun, Y., Zheng, L., Yang, Y., Tian, Q., & Wang, S. (2018). Beyond part models: Person retrieval with refined part pooling (and a strong convolutional baseline). In ECCV.","DOI":"10.1007\/978-3-030-01225-0_30"},{"key":"1569_CR59","doi-asserted-by":"crossref","unstructured":"Tripathi, S., Lipton, Z. C., Belongie, S., & Nguyen, T. (2016). Context matters: Refining object detection in video with recurrent neural networks. arXiv:1607.04648.","DOI":"10.5244\/C.30.44"},{"key":"1569_CR60","doi-asserted-by":"crossref","unstructured":"Wang, C., Zhang, Q., Huang, C., Liu, W., & Wang, X. (2018a). Mancs: A multi-task attentional network with curriculum sampling for person re-identification. In ECCV.","DOI":"10.1007\/978-3-030-01225-0_23"},{"issue":"7","key":"1569_CR61","doi-asserted-by":"publisher","first-page":"926","DOI":"10.1109\/LSP.2018.2822810","volume":"25","author":"F Wang","year":"2018","unstructured":"Wang, F., Cheng, J., Liu, W., & Liu, H. (2018b). Additive margin softmax for face verification. IEEE Signal Processing Letters, 25(7), 926\u2013930.","journal-title":"IEEE Signal Processing Letters"},{"key":"1569_CR62","doi-asserted-by":"crossref","unstructured":"Wang, G., Yuan, Y., Chen, X., Li, J., & Zhou, X. (2018c). Learning discriminative features with multiple granularities for person re-identification. In ACM MM.","DOI":"10.1145\/3240508.3240552"},{"key":"1569_CR63","doi-asserted-by":"crossref","unstructured":"Wang, H., Wang, Y., Zhou, Z., Ji, X., Gong, D., Zhou, J., Li, Z., & Liu, W. (2018d). CosFace: Large margin cosine loss for deep face recognition. In CVPR.","DOI":"10.1109\/CVPR.2018.00552"},{"key":"1569_CR64","doi-asserted-by":"crossref","unstructured":"Wang, S., Zhou, Y., Yan, J., & Deng, Z. (2018e). Fully motion-aware network for video object detection. In ECCV.","DOI":"10.1007\/978-3-030-01261-8_33"},{"key":"1569_CR65","doi-asserted-by":"crossref","unstructured":"Wang, X. & Gupta, A. (2018). Videos as space-time region graphs. In ECCV.","DOI":"10.1007\/978-3-030-01228-1_25"},{"key":"1569_CR66","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chen, Z., Wu, F., & Wang, G. (2018f). Person re-identification with cascaded pairwise convolutions. In CVPR.","DOI":"10.1109\/CVPR.2018.00159"},{"key":"1569_CR67","doi-asserted-by":"crossref","unstructured":"Wang, Y., Wang, L., You, Y., Zou, X., Chen, V., Li, S., Huang, G., Hariharan, B., & Weinberger, K. Q. (2018g). Resource aware person re-identification across multiple resolutions. In CVPR.","DOI":"10.1109\/CVPR.2018.00839"},{"key":"1569_CR68","doi-asserted-by":"crossref","unstructured":"Wei, L., Zhang, S., Gao, W., & Tian, Q. (2018). Person transfer GAN to bridge domain gap for person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2018.00016"},{"key":"1569_CR69","doi-asserted-by":"crossref","unstructured":"Wei, L., Zhang, S., Yao, H., Gao, W., & Tian, Q. (2017). GLAD: Global\u2013local-alignment descriptor for pedestrian retrieval. In ACM MM.","DOI":"10.1145\/3123266.3123279"},{"key":"1569_CR70","doi-asserted-by":"crossref","unstructured":"Wen, Y., Zhang, K., Li, Z., & Qiao, Y. (2016). A discriminative feature learning approach for deep face recognition. In ECCV.","DOI":"10.1007\/978-3-319-46478-7_31"},{"key":"1569_CR71","doi-asserted-by":"crossref","unstructured":"Wu, H., Chen, Y., Wang, N., & Zhang, Z. (2019). Sequence level semantics aggregation for video object detection. In ICCV.","DOI":"10.1109\/ICCV.2019.00931"},{"key":"1569_CR72","doi-asserted-by":"crossref","unstructured":"Wu, Y., Lin, Y., Dong, X., Yan, Y., Ouyang, W., & Yang, Y. (2018). Exploit the unknown gradually: One-shot video-based person re-identification by stepwise learning. In CVPR.","DOI":"10.1109\/CVPR.2018.00543"},{"key":"1569_CR73","doi-asserted-by":"crossref","unstructured":"Xiao, F. & Lee, Y. J. (2018). Video object detection with an aligned spatial-temporal memory. In ECCV.","DOI":"10.1007\/978-3-030-01237-3_30"},{"key":"1569_CR74","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., & He, K. (2017). Aggregated residual transformations for deep neural networks. In CVPR.","DOI":"10.1109\/CVPR.2017.634"},{"key":"1569_CR75","doi-asserted-by":"crossref","unstructured":"Yang, W., Huang, H., Zhang, Z., Chen, X., Huang, K., & Zhang, S. (2019). Towards rich feature discovery with class activation maps augmentation for person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2019.00148"},{"key":"1569_CR76","doi-asserted-by":"crossref","unstructured":"Yu, R., Zhou, Z., Bai, S., & Bai, X. (2017). Divide and fuse: A re-ranking approach for person re-identification. In BMVC.","DOI":"10.5244\/C.31.135"},{"key":"1569_CR77","doi-asserted-by":"crossref","unstructured":"Yun, S., Han, D., Oh, S. J., Chun, S., Choe, J., & Yoo, Y. (2019). Cutmix: Regularization strategy to train strong classifiers with localizable features. In ICCV.","DOI":"10.1109\/ICCV.2019.00612"},{"key":"1569_CR78","doi-asserted-by":"crossref","unstructured":"Zhang, H., Cisse, M., Dauphin, Y. N., & Lopez-Paz, D. (2017). mixup: Beyond empirical risk minimization. arXiv:1710.09412.","DOI":"10.1007\/978-1-4899-7687-1_79"},{"key":"1569_CR79","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Shen, X., Jin, Z., Lu, H., & Hua, X. (2019). Attribute-driven feature disentangling and temporal aggregation for video person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2019.00505"},{"key":"1569_CR80","doi-asserted-by":"crossref","unstructured":"Zheng, L., Bie, Z., Sun, Y., Wang, J., Su, C., Wang, S., & Tian, Q. (2016). MARS: A video benchmark for large-scale person re-identification. In ECCV.","DOI":"10.1007\/978-3-319-46466-4_52"},{"key":"1569_CR81","doi-asserted-by":"crossref","unstructured":"Zheng, L., Shen, L., Tian, L., Wang, S., Wang, J., & Tian, Q. (2015). Scalable person re-identification: A benchmark. In ICCV.","DOI":"10.1109\/ICCV.2015.133"},{"key":"1569_CR82","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Zheng, L., & Yang, Y. (2017a). A discriminatively learned CNN embedding for person reidentification. ACM Transactions on Multimedia Computing, Communications, and Applications, 14(1), 13.","DOI":"10.1145\/3159171"},{"key":"1569_CR83","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Zheng, L., & Yang, Y. (2017b). Unlabeled samples generated by GAN improve the person re-identification baseline in vitro. In ICCV.","DOI":"10.1109\/ICCV.2017.405"},{"key":"1569_CR84","doi-asserted-by":"crossref","unstructured":"Zhong, Z., Zheng, L., Cao, D., & Li, S. (2017). Re-ranking person re-identification with k-reciprocal encoding. In CVPR.","DOI":"10.1109\/CVPR.2017.389"},{"key":"1569_CR85","doi-asserted-by":"crossref","unstructured":"Zhong, Z., Zheng, L., Kang, G., Li, S., & Yang, Y. (2020). Random erasing data augmentation. In AAAI.","DOI":"10.1609\/aaai.v34i07.7000"},{"key":"1569_CR86","doi-asserted-by":"crossref","unstructured":"Zhu, X., Dai, J., Yuan, L., & Wei, Y. (2018). Towards high performance video object detection. In CVPR.","DOI":"10.1109\/CVPR.2018.00753"},{"key":"1569_CR87","doi-asserted-by":"crossref","unstructured":"Zhu, X., Wang, Y., Dai, J., Yuan, L., & Yichen, W. (2017a). Flow-guided feature aggregation for video object detection. In ICCV.","DOI":"10.1109\/ICCV.2017.52"},{"key":"1569_CR88","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xiong, Y., Dai, J., Yuan, L., & Wei, Y. (2017b). Deep feature flow for video recognition. In CVPR.","DOI":"10.1109\/CVPR.2017.441"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01569-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-021-01569-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01569-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,26]],"date-time":"2022-02-26T12:08:25Z","timestamp":1645877305000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-021-01569-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,2]]},"references-count":88,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2022,3]]}},"alternative-id":["1569"],"URL":"https:\/\/doi.org\/10.1007\/s11263-021-01569-2","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,2,2]]},"assertion":[{"value":"28 June 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 December 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 February 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}