{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T19:11:57Z","timestamp":1775243517797,"version":"3.50.1"},"publisher-location":"Cham","reference-count":67,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012694","type":"print"},{"value":"9783030012700","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01270-0_4","type":"book-chapter","created":{"date-parts":[[2018,10,5]],"date-time":"2018-10-05T18:07:51Z","timestamp":1538762871000},"page":"56-73","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":103,"title":["Improving Deep Visual Representation for Person Re-identification by Global and Local Image-language Association"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2490-1703","authenticated-orcid":false,"given":"Dapeng","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2664-7975","authenticated-orcid":false,"given":"Hongsheng","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1831-9952","authenticated-orcid":false,"given":"Xihui","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5413-2445","authenticated-orcid":false,"given":"Yantao","family":"Shen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3521-6744","authenticated-orcid":false,"given":"Jing","family":"Shao","sequence":"additional","affiliation":[]},{"given":"Zejian","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Xiaogang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Ahmed, E., Jones, M., Marks, T.K.: An improved deep learning architecture for person re-identification. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299016"},{"key":"4_CR2","unstructured":"Almazan, J., Gajic, B., Murray, N., Larlus, D.: Re-id done right: towards good practices for person re-identification. arXiv preprint arXiv:1801.05339 (2018)"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. arXiv preprint arXiv:1707.07998 (2017)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: Vqa: Visual question answering. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Bai, S., Bai, X., Tian, Q.: Scalable person re-identification on supervised smoothed manifold. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.358"},{"key":"4_CR6","unstructured":"Bai, X., Yang, M., Huang, T., Dou, Z., Yu, R., Xu, Y.: Deep-person: Learning discriminative deep features for person re-identification. CoRR abs\/ arXiv:1711.10658 (2017)"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Barbosa, I.B., Cristani, M., Del Bue, A., Bazzani, L., Murino, V.: Re-identification with rgb-d sensors. In: ECCV (2012)","DOI":"10.1007\/978-3-642-33863-2_43"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Chen, D., Xu, D., Li, H., Sebe, N., Wang, X.: Group consistent similarity learning via deep crf for person re-identification. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00902"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Chen, D., Yuan, Z., Chen, B., Zheng, N.: Similarity learning with spatial constraints for person re-identification. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.142"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Chen, D., Yuan, Z., Hua, G., Zheng, N., Wang, J.: Similarity learning on an explicit polynomial kernel feature map for person re-identification. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298764"},{"issue":"3","key":"4_CR11","doi-asserted-by":"publisher","first-page":"392","DOI":"10.1007\/s11263-017-0991-0","volume":"123","author":"D Chen","year":"2017","unstructured":"Chen, D., Yuan, Z., Wang, J., Chen, B., Hua, G., Zheng, N.: Exemplar-guided similarity learning on polynomial kernel feature map for person re-identification. Int. J. Comput. Vis. 123(3), 392\u2013414 (2017)","journal-title":"Int. J. Comput. Vis."},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Chen, W., Chen, X., Zhang, J., Huang, K.: Beyond triplet loss: A deep quadruplet network for person re-identification. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.145"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Chen, X., Zitnick, C.L.: Mind\u2019s eye: A recurrent visual representation for image caption generation. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298856"},{"issue":"2","key":"4_CR14","doi-asserted-by":"publisher","first-page":"392","DOI":"10.1109\/TPAMI.2017.2666805","volume":"40","author":"YC Chen","year":"2018","unstructured":"Chen, Y.C., Zhu, X., Zheng, W.S., Lai, J.H.: Person re-identification by camera correlation aware feature augmentation. IEEE Trans. Pattern Anal. Mach. Intell. 40(2), 392\u2013408 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Chen, Y., Zhu, X., Gong, S.: Person re-identification by deep learning multi-scale representations. In: ICCVW (2017)","DOI":"10.1109\/ICCVW.2017.304"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Farenzena, M., Bazzani, L., Perina, A., Murino, V., Cristani, M.: Person re-identification by symmetry-driven accumulation of local features. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5539926"},{"key":"4_CR18","unstructured":"Frome, A., Corrado, G.S., Shlens, J., Bengio, S., Dean, J., Mikolov, T., et al.: Devise: A deep visual-semantic embedding model. In: NIPS, pp. 2121\u20132129 (2013)"},{"key":"4_CR19","doi-asserted-by":"crossref","unstructured":"Johnson, J., Karpathy, A., Fei-Fei, L.: Densecap: Fully convolutional localization networks for dense captioning. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.494"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. IEEE Trans. Pattern Anal. Mach. Intell. 39(4) (2017)","DOI":"10.1109\/TPAMI.2016.2598339"},{"key":"4_CR21","unstructured":"Karpathy, A., Joulin, A., Fei-Fei, L.: Deep fragment embeddings for bidirectional image sentence mapping. In: NIPS (2014)"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Koestinger, M., Hirzer, M., Wohlhart, P., Roth, P.M., Bischof, H.: Large scale metric learning from equivalence constraints. In: CVPR, pp. 2288\u20132295 (2012)","DOI":"10.1109\/CVPR.2012.6247939"},{"key":"4_CR23","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Pereira, F., Burges, C.J.C., Bottou, L., Weinberger, K.Q. (eds.) NIPS (2012)"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Li, D., Chen, X., Zhang, Z., Huang, K.: Learning deep context-aware features over body and latent parts for person re-identification. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.782"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Li, S., Xiao, T., Li, H., Yang, W., Wang, X.: Identity-aware textual-visual matching with latent co-attention. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.209"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Li, S., Xiao, T., Li, H., Zhou, B., Yue, D., Wang, X.: Person search with natural language description. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.551"},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Li, W., Zhao, R., Wang, X.: Human reidentification with transferred metric learning. In: ACCV (2012)","DOI":"10.1007\/978-3-642-37331-2_3"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Li, W., Zhao, R., Xiao, T., Wang, X.: Deepreid: Deep filter pairing neural network for person re-identification. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.27"},{"key":"4_CR29","doi-asserted-by":"crossref","unstructured":"Li, W., Zhu, X., Gong, S.: Person re-identification by deep joint learning of multi-loss classification. In: IJCAI (2017)","DOI":"10.24963\/ijcai.2017\/305"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: Visual question generation as dual task of visual question answering. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00640"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Liao, S., Hu, Y., Zhu, X., Li, S.Z.: Person re-identification by local maximal occurrence representation and metric learning. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298832"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Lin, J., Ren, L., Lu, J., Feng, J., Zhou, J.: Consistent-aware deep learning for person re-identification in a camera network. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.362"},{"key":"4_CR33","unstructured":"Lin, Y., Zheng, L., Zheng, Z., Wu, Y., Yang, Y.: Improving person re-identification by attribute and identity learning. CoRR abs\/ arXiv:1703.07220 (2017)"},{"key":"4_CR34","doi-asserted-by":"crossref","unstructured":"Liu, J., Shahroudy, A., Xu, D., Wang, G.: Spatio-temporal lstm with trust gates for 3d human action recognition. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46487-9_50"},{"key":"4_CR35","doi-asserted-by":"crossref","unstructured":"Liu, X., Li, H., Shao, J., Chen, D., Wang, X.: Show, tell and discriminate: Image captioning by self-retrieval with partially labeled data. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01267-0_21"},{"key":"4_CR36","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: Hydraplus-net: Attentive deep features for pedestrian analysis. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.46"},{"key":"4_CR37","doi-asserted-by":"crossref","unstructured":"Ma, B., Su, Y., Jurie, F.: Bicov: a novel image representation for person re-identification and face verification. In: British Machive Vision Conference, pp. 11-pages (2012)","DOI":"10.5244\/C.26.57"},{"key":"4_CR38","doi-asserted-by":"crossref","unstructured":"Mignon, A., Jurie, F.: Pcca: A new approach for distance learning from sparse pairwise constraints. In: CVPR. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6247987"},{"key":"4_CR39","unstructured":"Mirza, M., Osindero, S.: Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784 (2014)"},{"key":"4_CR40","doi-asserted-by":"crossref","unstructured":"Qian, X., Fu, Y., Jiang, Y.G., Xiang, T., Xue, X.: Multi-scale deep learning architectures for person re-identification. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.577"},{"key":"4_CR41","doi-asserted-by":"crossref","unstructured":"Reed, S., Akata, Z., Lee, H., Schiele, B.: Learning deep representations of fine-grained visual descriptions. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.13"},{"key":"4_CR42","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: ICML (2016)"},{"key":"4_CR43","doi-asserted-by":"crossref","unstructured":"Rennie, S.J., Marcheret, E., Mroueh, Y., Ross, J., Goel, V.: Self-critical sequence training for image captioning. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.131"},{"key":"4_CR44","doi-asserted-by":"crossref","unstructured":"Schumann, A., Stiefelhagen, R.: Person re-identification by deep learning attribute-complementary information. In: CVPRW (2017)","DOI":"10.1109\/CVPRW.2017.186"},{"key":"4_CR45","doi-asserted-by":"crossref","unstructured":"Shen, Y., Li, H., Xiao, T., Yi, S., Chen, D., Wang, X.: Deep group-shuffling random walk for person re-identification. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00241"},{"key":"4_CR46","doi-asserted-by":"crossref","unstructured":"Shen, Y., Xiao, T., Li, H., Yi, S., Wang, X.: End-to-end deep kronecker-product matching for person re-identification. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00720"},{"key":"4_CR47","doi-asserted-by":"crossref","unstructured":"Su, C., Li, J., Zhang, S., Xing, J., Gao, W., Tian, Q.: Pose-driven deep convolutional model for person re-identification. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.427"},{"key":"4_CR48","doi-asserted-by":"crossref","unstructured":"Su, C., Zhang, S., Xing, J., Gao, W., Tian, Q.: Deep attributes driven multi-camera person re-identification. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46475-6_30"},{"key":"4_CR49","doi-asserted-by":"crossref","unstructured":"Sun, Y., Zheng, L., Deng, W., Wang, S.: Svdnet for pedestrian retrieval. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.410"},{"key":"4_CR50","doi-asserted-by":"crossref","unstructured":"Varior, R.R., Haloi, M., Wang, G.: Gated siamese convolutional neural network architecture for human re-identification. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46484-8_48"},{"key":"4_CR51","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: A neural image caption generator. In: CVPR. IEEE (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"4_CR52","doi-asserted-by":"crossref","unstructured":"Wang, F., Zuo, W., Lin, L., Zhang, D., Zhang, L.: Joint learning of single-image and cross-image representations for person re-identification. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.144"},{"key":"4_CR53","doi-asserted-by":"crossref","unstructured":"Wang, X., Doretto, G., Sebastian, T., Rittscher, J., Tu, P.: Shape and appearance context modeling. In: ICCV (2007)","DOI":"10.1109\/ICCV.2007.4409019"},{"key":"4_CR54","doi-asserted-by":"crossref","unstructured":"Wu, A., Zheng, W.S., Yu, H.X., Gong, S., Lai, J.: Rgb-infrared cross-modality person re-identification. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.575"},{"key":"4_CR55","doi-asserted-by":"crossref","unstructured":"Xiao, T., Li, H., Ouyang, W., Wang, X.: Learning deep feature representations with domain guided dropout for person re-identification. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.140"},{"key":"4_CR56","unstructured":"Xiao, T., Li, S., Wang, B., Lin, L., Wang, X.: End-to-end deep learning for person search. CoRR abs\/ arXiv:1604.01850 (2016)"},{"key":"4_CR57","unstructured":"Xu, K., et al.: Show, attend and tell: Neural image caption generation with visual attention. In: ICML, pp. 2048\u20132057 (2015)"},{"key":"4_CR58","doi-asserted-by":"crossref","unstructured":"Zhang, L., Xiang, T., Gong, S.: Learning a discriminative null space for person re-identification. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.139"},{"key":"4_CR59","doi-asserted-by":"crossref","unstructured":"Zhao, H., et al.: Spindle net: Person re-identification with human body region guided feature decomposition and fusion. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.103"},{"key":"4_CR60","doi-asserted-by":"crossref","unstructured":"Zhao, L., Li, X., Zhuang, Y., Wang, J.: Deeply-learned part-aligned representations for person re-identification. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.349"},{"key":"4_CR61","unstructured":"Zheng, L., Huang, Y., Lu, H., Yang, Y.: Pose invariant embedding for deep person re-identification. CoRR abs\/ arXiv:1701.07732 (2017)"},{"key":"4_CR62","doi-asserted-by":"crossref","unstructured":"Zheng, L., Shen, L., Tian, L., Wang, S., Wang, J., Tian, Q.: Scalable person re-identification: A benchmark. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.133"},{"key":"4_CR63","unstructured":"Zheng, Z., Zheng, L., Garrett, M., Yang, Y., Shen, Y.: Dual-path convolutional image-text embedding. CoRR abs\/ arXiv:1711.05535 (2017). http:\/\/arxiv.org\/abs\/1711.05535"},{"key":"4_CR64","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Zheng, L., Yang, Y.: Unlabeled samples generated by gan improve the person re-identification baseline in vitro. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.405"},{"key":"4_CR65","doi-asserted-by":"crossref","unstructured":"Zhong, Z., Zheng, L., Cao, D., Li, S.: Re-ranking person re-identification with k-reciprocal encoding. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.389"},{"key":"4_CR66","doi-asserted-by":"crossref","unstructured":"Zhou, J., Yu, P., Tang, W., Wu, Y.: Efficient online local metric adaptation via negative samples for person re-identification. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.265"},{"key":"4_CR67","doi-asserted-by":"crossref","unstructured":"Zhou, S., Wang, J., Wang, J., Gong, Y., Zheng, N.: Point to set similarity based deep feature learning for person re-identification. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.534"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01270-0_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T18:38:56Z","timestamp":1775241536000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01270-0_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012694","9783030012700"],"references-count":67,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01270-0_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}