{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T19:56:42Z","timestamp":1774727802932,"version":"3.50.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"21","license":[{"start":{"date-parts":[[2024,4,26]],"date-time":"2024-04-26T00:00:00Z","timestamp":1714089600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,26]],"date-time":"2024-04-26T00:00:00Z","timestamp":1714089600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100012547","name":"Natural Science Foundation of Guangxi Zhuang Autonomous Region","doi-asserted-by":"publisher","award":["62266009"],"award-info":[{"award-number":["62266009"]}],"id":[{"id":"10.13039\/100012547","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1007\/s00521-024-09691-1","type":"journal-article","created":{"date-parts":[[2024,4,26]],"date-time":"2024-04-26T12:02:15Z","timestamp":1714132935000},"page":"13327-13339","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Text-based person search by non-saliency enhancing and dynamic label smoothing"],"prefix":"10.1007","volume":"36","author":[{"given":"Yonghua","family":"Pang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4375-1405","authenticated-orcid":false,"given":"Canlong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhixin","family":"Li","sequence":"additional","affiliation":[]},{"given":"Chunrong","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Zhiwen","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,26]]},"reference":[{"key":"9691_CR1","doi-asserted-by":"crossref","unstructured":"Li S, Xiao T, Li H, Yang W, Wang X (2017) Identity-aware textual-visual matching with latent co-attention. In: Proceedings of the IEEE international conference on computer vision. pp 1890\u20131899","DOI":"10.1109\/ICCV.2017.209"},{"issue":"4","key":"9691_CR2","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.29.4.043028","volume":"29","author":"Z Wang","year":"2020","unstructured":"Wang Z, Zhu A, Zheng Z, Jin J, Xue Z, Hua G (2020) IMG-Net: inner-cross-modal attentional multigranular network for description-based person re-identification. J Electron Imaging 29(4):043028","journal-title":"J Electron Imaging"},{"key":"9691_CR3","doi-asserted-by":"crossref","unstructured":"Zhu A, Wang Z, Li Y, Wan X, Jin J, Wang T, Hu F, Hua G (2021) Dssl: Deep surroundings-person separation learning for text-based person retrieval. In: Proceedings of the 29th ACM international conference on multimedia, pp 209\u2013217","DOI":"10.1145\/3474085.3475369"},{"key":"9691_CR4","unstructured":"Ding Z, Ding C, Shao Z, Tao D (2021) Semantically self-aligned network for text-to-image part-aware person re-identification. arXiv:2107.12666"},{"key":"9691_CR5","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1016\/j.neucom.2022.04.081","volume":"494","author":"Y Chen","year":"2022","unstructured":"Chen Y, Zhang G, Lu Y, Wang Z, Zheng Y (2022) Tipcb: A simple but effective part-based convolutional baseline for text-based person search. Neurocomputing 494:171\u2013181","journal-title":"Neurocomputing"},{"key":"9691_CR6","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et al (2020) An image is worth $$16\\times 16$$ words: transformers for image recognition at scale. arXiv:2010.11929"},{"key":"9691_CR7","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2018) Bert: pre-training of deep bidirectional transformers for language understanding. arXiv:1810.04805"},{"key":"9691_CR8","doi-asserted-by":"crossref","unstructured":"Zhang Y, Lu H (2018) Deep cross-modal projection learning for image-text matching. In: Proceedings of the European conference on computer vision (ECCV). pp 686\u2013701","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"9691_CR9","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"9691_CR10","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30"},{"key":"9691_CR11","doi-asserted-by":"crossref","unstructured":"Chen Y-C, Li L, Yu L, El\u00a0Kholy A, Ahmed F, Gan Z, Cheng Y, Liu J (2020) Uniter: Universal image-text representation learning. In: Computer vision-ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXX. Springer, pp 104\u2013120","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"9691_CR12","first-page":"11336","volume":"34","author":"G Li","year":"2020","unstructured":"Li G, Duan N, Fang Y, Gong M, Jiang D (2020) Unicoder-vl: A universal encoder for vision and language by cross-modal pre-training. Proc AAAI Conf Artif Intell 34:11336\u201311344","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"9691_CR13","unstructured":"Li LH, Yatskar M, Yin D, Hsieh C-J, Chang K-W (2019) Visualbert: a simple and performant baseline for vision and language. arXiv:1908.03557"},{"key":"9691_CR14","unstructured":"Lu J, Batra D, Parikh D, Lee S (2019) Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Adv Neural Inf Process Syst 32"},{"key":"9691_CR15","unstructured":"Su W, Zhu X, Cao Y, Li B, Lu L, Wei F, Dai J (2019) Vl-bert: pre-training of generic visual-linguistic representations. arXiv:1908.08530"},{"key":"9691_CR16","doi-asserted-by":"crossref","unstructured":"Tan H, Bansal M (2019) Lxmert: Learning cross-modality encoder representations from transformers. arXiv:1908.07490","DOI":"10.18653\/v1\/D19-1514"},{"key":"9691_CR17","doi-asserted-by":"crossref","unstructured":"Chang X, Wang T, Cai S, Sun C (2023) Landmark: language-guided representation enhancement framework for scene graph generation. arXiv:2303.01080","DOI":"10.1007\/s10489-023-04722-1"},{"key":"9691_CR18","doi-asserted-by":"crossref","unstructured":"Wu N, Kera H, Kawamoto K (2023) Improving zero-shot action recognition using human instruction with text description. Appl Intell 1\u201315","DOI":"10.1007\/s10489-023-04808-w"},{"key":"9691_CR19","doi-asserted-by":"crossref","unstructured":"Munusamy H (2023) Multimodal attention-based transformer for video captioning. Appl Intell 1\u201320","DOI":"10.1007\/s10489-023-04597-2"},{"key":"9691_CR20","doi-asserted-by":"crossref","unstructured":"Sun C, Myers A, Vondrick C, Murphy K, Schmid C (2019) Videobert: a joint model for video and language representation learning. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp 7464\u20137473","DOI":"10.1109\/ICCV.2019.00756"},{"key":"9691_CR21","doi-asserted-by":"crossref","unstructured":"Huang Z, Zeng Z, Huang Y, Liu B, Fu D, Fu J (2021) Seeing out of the box: end-to-end pre-training for vision-language representation learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 12976\u201312985","DOI":"10.1109\/CVPR46437.2021.01278"},{"key":"9691_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2023.102467","volume":"79","author":"E Ning","year":"2023","unstructured":"Ning E, Zhang C, Wang C, Ning X, Chen H, Bai X (2023) Pedestrian re-id based on feature consistency and contrast enhancement. Displays 79:102467","journal-title":"Displays"},{"issue":"9","key":"9691_CR23","doi-asserted-by":"publisher","first-page":"4500","DOI":"10.1109\/TIP.2019.2910414","volume":"28","author":"L Zheng","year":"2019","unstructured":"Zheng L, Huang Y, Lu H, Yang Y (2019) Pose-invariant embedding for deep person re-identification. IEEE Trans Image Process 28(9):4500\u20134509","journal-title":"IEEE Trans Image Process"},{"issue":"3","key":"9691_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2023.103295","volume":"60","author":"J Yang","year":"2023","unstructured":"Yang J, Zhang C, Li Z, Tang Y, Wang Z (2023) Discriminative feature mining with relation regularization for person re-identification. Inf Process Manag 60(3):103295","journal-title":"Inf Process Manag"},{"issue":"4","key":"9691_CR25","doi-asserted-by":"publisher","first-page":"4109","DOI":"10.1007\/s10489-022-03640-y","volume":"53","author":"P Wei","year":"2023","unstructured":"Wei P, Zhang C, Tang Y, Li Z, Wang Z (2023) Reinforced domain adaptation with attention and adversarial learning for unsupervised person Re-ID. Appl Intell 53(4):4109\u20134123","journal-title":"Appl Intell"},{"issue":"10","key":"9691_CR26","doi-asserted-by":"publisher","first-page":"8241","DOI":"10.1007\/s00521-022-06903-4","volume":"34","author":"J Yang","year":"2022","unstructured":"Yang J, Zhang C, Tang Y, Li Z (2022) PAFM: pose-drive attention fusion mechanism for occluded person re-identification. Neural Comput Appl 34(10):8241\u20138252","journal-title":"Neural Comput Appl"},{"key":"9691_CR27","doi-asserted-by":"crossref","unstructured":"Li S, Xiao T, Li H, Zhou B, Yue D, Wang X (2017) Person search with natural language description. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 1970\u20131979","DOI":"10.1109\/CVPR.2017.551"},{"issue":"07","key":"9691_CR28","first-page":"11189","volume":"34","author":"Y Jing","year":"2020","unstructured":"Jing Y, Si C, Wang J, Wang W, Wang L, Tan T (2020) Pose-guided multi-granularity attention network for text-based person search. Proc AAAI Conf Artif Intell 34(07):11189\u201311196","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"9691_CR29","doi-asserted-by":"publisher","first-page":"5542","DOI":"10.1109\/TIP.2020.2984883","volume":"29","author":"K Niu","year":"2020","unstructured":"Niu K, Huang Y, Ouyang W, Wang L (2020) Improving description-based person re-identification by multi-granularity image-text alignments. IEEE Trans Image Process 29:5542\u20135556","journal-title":"IEEE Trans Image Process"},{"key":"9691_CR30","doi-asserted-by":"crossref","unstructured":"Zheng K, Liu W, Liu J, Zha Z-J, Mei T (2020) Hierarchical Gumbel attention network for text-based person search. In: Proceedings of the 28th ACM international conference on multimedia. pp 3441\u20133449","DOI":"10.1145\/3394171.3413864"},{"key":"9691_CR31","doi-asserted-by":"crossref","unstructured":"Shu X, Wen W, Wu H, Chen K, Song Y, Qiao R, Ren B, Wang X (2022) See finer, see more: implicit modality alignment for text-based person retrieval. arXiv:2208.08608","DOI":"10.1007\/978-3-031-25072-9_42"},{"key":"9691_CR32","doi-asserted-by":"crossref","unstructured":"Cubuk ED, Zoph B, Mane D, Vasudevan V, Le QV (2018) Autoaugment: learning augmentation policies from data. arXiv:1805.09501","DOI":"10.1109\/CVPR.2019.00020"},{"key":"9691_CR33","unstructured":"Lim S, Kim I, Kim T, Kim C, Kim S (2019) Fast autoaugment. Adv Neural Inf Process Syst 32"},{"key":"9691_CR34","unstructured":"Ho D, Liang E, Chen X, Stoica I, Abbeel P (2019) Population based augmentation: efficient learning of augmentation policy schedules. In: International conference on machine learning, PMLR. pp 2731\u20132741"},{"issue":"1","key":"9691_CR35","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929\u20131958","journal-title":"J Mach Learn Res"},{"key":"9691_CR36","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: Accelerating deep network training by reducing internal covariate shift. In: International conference on machine learning. PMLR, pp 448\u2013456"},{"key":"9691_CR37","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1162\/tacl_a_00166","volume":"2","author":"P Young","year":"2014","unstructured":"Young P, Lai A, Hodosh M, Hockenmaier J (2014) From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions. Trans Assoc Comput Linguist 2:67\u201378","journal-title":"Trans Assoc Comput Linguist"},{"key":"9691_CR38","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: common objects in context. In: European conference on computer vision. Springer, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"9691_CR39","doi-asserted-by":"crossref","unstructured":"He S, Luo H, Wang P, Wang F, Li H, Jiang W (2021) Transreid: Transformer-based object re-identification. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV). pp 15013\u201315022","DOI":"10.1109\/ICCV48922.2021.01474"},{"key":"9691_CR40","doi-asserted-by":"crossref","unstructured":"Reed S, Akata Z, Lee H, Schiele B (2016) Learning deep representations of fine-grained visual descriptions. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 49\u201358","DOI":"10.1109\/CVPR.2016.13"},{"key":"9691_CR41","doi-asserted-by":"crossref","unstructured":"Chen T, Xu C, Luo J (2018) Improving text-based person search by spatial matching and adaptive threshold. In: 2018 IEEE winter conference on applications of computer vision (WACV). IEEE, pp 1879\u20131887","DOI":"10.1109\/WACV.2018.00208"},{"key":"9691_CR42","doi-asserted-by":"crossref","unstructured":"Chen D, Li H, Liu X, Shen Y, Shao J, Yuan Z, Wang X (2018) Improving deep visual representation for person re-identification by global and local image-language association. In: Proceedings of the European conference on computer vision (ECCV). pp 54\u201370","DOI":"10.1007\/978-3-030-01270-0_4"},{"key":"9691_CR43","doi-asserted-by":"crossref","unstructured":"Liu J, Zha Z-J, Hong R, Wang M, Zhang Y (2019) Deep adversarial graph attention convolution network for text-based person search. In: Proceedings of the 27th ACM international conference on multimedia, pp 665\u2013673","DOI":"10.1145\/3343031.3350991"},{"key":"9691_CR44","doi-asserted-by":"crossref","unstructured":"Aggarwal S, Radhakrishnan VB, Chakraborty A (2020) Text-based person search via attribute-aided matching. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 2617\u20132625","DOI":"10.1109\/WACV45572.2020.9093640"},{"key":"9691_CR45","unstructured":"Gao C, Cai G, Jiang X, Zheng F, Zhang J, Gong Y, Peng P, Guo X, Sun X (2021) Contextual non-local alignment over full-scale representation for text-based person search. arXiv:2101.03036"},{"key":"9691_CR46","doi-asserted-by":"crossref","unstructured":"Wang C, Luo Z, Lin Y, Li S (2021) Text-based person search via multi-granularity embedding learning. In: IJCAI, pp 1068\u20131074","DOI":"10.24963\/ijcai.2021\/148"},{"key":"9691_CR47","unstructured":"Han X, He S, Zhang L, Xiang T (2021) Text-based person search with limited data. arXiv:2110.10807"},{"key":"9691_CR48","doi-asserted-by":"crossref","unstructured":"Wang Z, Zhu A, Xue J, Wan X, Liu C, Wang T, Li Y (2022) Look before you leap: improving text-based person retrieval by learning a consistent cross-modal common manifold. In: Proceedings of the 30th ACM international conference on multimedia, pp 1984\u20131992","DOI":"10.1145\/3503161.3548166"},{"key":"9691_CR49","unstructured":"Li F, Zhou H, Li H, Zhang Y, Yu Z (2022) Person text-image matching via text-feature interpretability embedding and external attack node implantation. arXiv:2211.08657"},{"key":"9691_CR50","doi-asserted-by":"crossref","unstructured":"Li S, Cao M, Zhang M (2022) Learning semantic-aligned feature representation for text-based person search. In: ICASSP 2022\u20132022 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 2724\u20132728","DOI":"10.1109\/ICASSP43922.2022.9746846"},{"key":"9691_CR51","doi-asserted-by":"crossref","unstructured":"Wang Z, Zhu A, Xue J, Wan X, Liu C, Wang T, Li Y (2022) Caibc: Capturing all-round information beyond color for text-based person retrieval. In: Proceedings of the 30th ACM international conference on multimedia, pp 5314\u20135322","DOI":"10.1145\/3503161.3548057"},{"key":"9691_CR52","doi-asserted-by":"crossref","unstructured":"Shao Z, Zhang X, Fang M, Lin Z, Wang J, Ding C (2022) Learning granularity-unified representations for text-to-image person re-identification. In: Proceedings of the 30th ACM international conference on multimedia, pp 5566\u20135574","DOI":"10.1145\/3503161.3548028"},{"key":"9691_CR53","doi-asserted-by":"crossref","unstructured":"Wang Z, Xue J, Zhu A, Li Y, Zhang M, Zhong C (2021) Amen: adversarial multi-space embedding network for text-based person re-identification. In: Chinese conference on pattern recognition and computer vision (PRCV). Springer, pp 462\u2013473","DOI":"10.1007\/978-3-030-88007-1_38"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09691-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-09691-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09691-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T10:16:20Z","timestamp":1720779380000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-09691-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,26]]},"references-count":53,"journal-issue":{"issue":"21","published-print":{"date-parts":[[2024,7]]}},"alternative-id":["9691"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-09691-1","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4,26]]},"assertion":[{"value":"13 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 March 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 April 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interest to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}