{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T14:40:07Z","timestamp":1750603207364,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":33,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819687244","type":"print"},{"value":"9789819687251","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-8725-1_12","type":"book-chapter","created":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T14:23:37Z","timestamp":1750602217000},"page":"143-156","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Soft Multi-view Representation Learning for\u00a0Disambiguating Text-Based Person Retrieval"],"prefix":"10.1007","author":[{"given":"Jiamin","family":"Zhuang","sequence":"first","affiliation":[]},{"given":"Jing","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Xiangyan","family":"Qu","sequence":"additional","affiliation":[]},{"given":"Yuanmin","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Gaopeng","family":"Gou","sequence":"additional","affiliation":[]},{"given":"Gang","family":"Xiong","sequence":"additional","affiliation":[]},{"given":"Qi","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,21]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Bai, Y., et al.: RaSa: Relation and sensitivity aware representation learning for text-based person search. In: IJCAI, pp. 555\u2013563 (2023)","DOI":"10.24963\/ijcai.2023\/62"},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Chen, M., Cheng, H., Du, Y., Xu, M., Jiang, W., Wang, C.: Two wrongs don\u2019t make a right: combating confirmation bias in learning with label noise. In: AAAI, pp. 14765\u201314773 (2023)","DOI":"10.1609\/aaai.v37i12.26725"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Chen, T., Xu, C., Luo, J.: Improving text-based person search by spatial matching and adaptive threshold. In: WACV, pp. 1879\u20131887 (2018)","DOI":"10.1109\/WACV.2018.00208"},{"key":"12_CR4","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL-HLT, pp. 4171\u20134186 (2019)"},{"key":"12_CR5","unstructured":"Ding, Z., Ding, C., Shao, Z., Tao, D.: Semantically self-aligned network for text-to-image part-aware person re-identification (2021)"},{"key":"12_CR6","unstructured":"Gao, C., et al.: Contextual non-local alignment over full-scale representation for text-based person search (2021)"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"12_CR8","unstructured":"Hendrycks, D., Gimpel, K.: Bridging ninproceedingsarities and stochastic regularizers with gaussian error linear units (2016)"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory, vol. 9, pp. 1735\u20131780 (1997)","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Jiang, D., Ye, M.: Cross-modal implicit relation reasoning and aligning for text-to-image person retrieval. In: CVPR, pp. 2787\u20132797 (2023)","DOI":"10.1109\/CVPR52729.2023.00273"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Jing, Y., Si, C., Wang, J., Wang, W., Wang, L., Tan, T.: Pose-guided multi-granularity attention network for text-based person search. In: AAAI, pp. 11189\u201311196 (2020)","DOI":"10.1609\/aaai.v34i07.6777"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Kim, D., Kim, N., Kwak, S.: Improving cross-modal retrieval with set of diverse embeddings. In: CVPR, pp. 23422\u201323431 (2023)","DOI":"10.1109\/CVPR52729.2023.02243"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Li, H., Yan, S., Yu, Z., Tao, D.: Attribute-identity embedding and self-supervised learning for scalable person re-identification, vol. 30, pp. 472\u20133485 (2020)","DOI":"10.1109\/TCSVT.2019.2952550"},{"key":"12_CR14","unstructured":"Li, J., Selvaraju, R.R., Gotmare, A., Joty, S.R., Xiong, C., Hoi, S.C.: Align before fuse: vision and language representation learning with momentum distillation. In: NeurIPS, pp. 9694\u20139705 (2021)"},{"key":"12_CR15","doi-asserted-by":"crossref","unstructured":"Li, S., Cao, M., Zhang, M.: Learning semantic-aligned feature representation for text-based person search. In: ICASSP, pp. 2724\u20132728 (2022)","DOI":"10.1109\/ICASSP43922.2022.9746846"},{"key":"12_CR16","doi-asserted-by":"crossref","unstructured":"Li, S., Xiao, T., Li, H., Zhou, B., Yue, D., Wang, X.: Person search with natural language description. In: CVPR, pp. 5187\u20135196 (2017)","DOI":"10.1109\/CVPR.2017.551"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Ma, Y., Sun, X., Ji, J., Jiang, G., Zhuang, W., Ji, R.: Beat: bi-directional one-to-many embedding alignment for text-based person retrieval. In: ACM MM, pp. 4157\u20134168 (2023)","DOI":"10.1145\/3581783.3611768"},{"key":"12_CR18","unstructured":"Narasimhan, M., Lazebnik, S., Schwing, A.: Out of the box: reasoning with graph convolution nets for factual visual question answering. In: NeurIPS, pp. 2659\u20132670 (2018)"},{"key":"12_CR19","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML, pp. 8748\u20138763 (2021)"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Sarafianos, N., Xu, X., Kakadiaris, I.A.: Adversarial representation learning for text-to-image matching. In: ICCV, pp. 5813\u20135823 (2019)","DOI":"10.1109\/ICCV.2019.00591"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Sennrich, R., Haddow, B., Birch, A.: Neural machine translation of rare words with subword units. In: ACL, pp. 1715\u20131725 (2016)","DOI":"10.18653\/v1\/P16-1162"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Shao, Z., Zhang, X., Ding, C., Wang, J., Wang, J.: Unified pre-training with pseudo texts for text-to-image person re-identification. In: ICCV, pp. 11140\u201311150 (2023)","DOI":"10.1109\/ICCV51070.2023.01026"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Shu, X., et al.: See finer, see more: implicit modality alignment for text-based person retrieval. In: ECCV, pp. 624\u2013641 (2022)","DOI":"10.1007\/978-3-031-25072-9_42"},{"key":"12_CR24","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: ICLR (2015)"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Song, Y., Soleymani, M.: Polysemous visual-semantic embedding for cross-modal retrieval. In: CVPR, pp. 1979\u20131988 (2019)","DOI":"10.1109\/CVPR.2019.00208"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Tan, W., Ding, C., Jiang, J., Wang, F., Zhan, Y., Tao, D.: Harnessing the power of MLLMs for transferable text-to-image person reid. In: CVPR, pp. 17127\u201317137 (2024)","DOI":"10.1109\/CVPR52733.2024.01621"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Wang, Y., Bo, C., Wang, D., Wang, S., Qi, Y., Lu, H.: Language person search with mutually connected classification loss. In: ICASSP, pp. 2057\u20132061 (2019)","DOI":"10.1109\/ICASSP.2019.8682456"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"Wang, Z., Fang, Z., Wang, J., Yang, Y.: ViTAA: visual-textual attributes alignment in person search by natural language. In: ECCV, pp. 402\u2013420 (2020)","DOI":"10.1007\/978-3-030-58610-2_24"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Yan, S., Dong, N., Zhang, L., Tang, J.: Clip-driven fine-grained text-image person re-identification, vol. 32, pp. 6032\u20136046 (2023)","DOI":"10.1109\/TIP.2023.3327924"},{"key":"12_CR30","doi-asserted-by":"crossref","unstructured":"Yang, S., Zhou, Y., Zheng, Z., Wang, Y., Zhu, L., Wu, Y.: Towards unified text-based person retrieval: a large-scale multi-attribute and language search benchmark. In: ACM MM, pp. 4492\u20134501 (2023)","DOI":"10.1145\/3581783.3611709"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Lu, H.: Deep cross-modal projection learning for image-text matching. In: ECCV, pp. 707\u2013723 (2018)","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"12_CR32","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Zheng, L., Garrett, M., Yang, Y., Xu, M., Shen, Y.: Dual-path convolutional image-text embeddings with instance loss, vol.\u00a016, pp. 51:1\u201351:23 (2020)","DOI":"10.1145\/3383184"},{"key":"12_CR33","doi-asserted-by":"crossref","unstructured":"Zhu, A., et al.: DSSL: deep surroundings-person separation learning for text-based person retrieval. In: ACM MM, pp. 209\u2013217 (2021)","DOI":"10.1145\/3474085.3475369"}],"container-title":["Lecture Notes in Computer Science","Wireless Artificial Intelligent Computing Systems and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-8725-1_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T14:23:42Z","timestamp":1750602222000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-8725-1_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819687244","9789819687251"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-8725-1_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"21 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"WASA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Wireless Artificial Intelligent Computing Systems and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tokyo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"wasa2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/wasa-conference.org\/WASA2025\/index.html#","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}