{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T23:47:28Z","timestamp":1771458448759,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T00:00:00Z","timestamp":1683244800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T00:00:00Z","timestamp":1683244800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62262036"],"award-info":[{"award-number":["62262036"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2023,7]]},"DOI":"10.1007\/s00138-023-01392-4","type":"journal-article","created":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T07:02:09Z","timestamp":1683270129000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Crowded pose-guided multi-task learning for instance-level human parsing"],"prefix":"10.1007","volume":"34","author":[{"given":"Yong","family":"Wei","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9685-6599","authenticated-orcid":false,"given":"Li","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaodong","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"LiJun","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,5,5]]},"reference":[{"issue":"6","key":"1392_CR1","doi-asserted-by":"publisher","first-page":"3260","DOI":"10.1109\/TPAMI.2020.3048039","volume":"44","author":"P Li","year":"2022","unstructured":"Li, P., Xu, Y., Wei, Y., Yang, Y.: Self-correction for human parsing. IEEE Trans. Pattern Anal. Mach. Intell. 44(6), 3260\u20133271 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1392_CR2","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the 28th IEEE Conference on Computer Vision and Pattern Recognition, pp. 4\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"issue":"3","key":"1392_CR3","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/s00138-022-01291-0","volume":"33","author":"Z Malik","year":"2022","unstructured":"Malik, Z., Shapiai, M.I.B.: Human action interpretation using convolutional neural network: a survey. Mach. Vis. Appl. 33(3), 37 (2022)","journal-title":"Mach. Vis. Appl."},{"key":"1392_CR4","doi-asserted-by":"crossref","unstructured":"Gupta, A., Shen, Z., Huang, T.S.: Text embedding bank for detailed image paragraph captioning. In: Proceedings of the 35th AAAI Conference on Artificial Intelligence, pp. 15791\u201315792 (2021)","DOI":"10.1609\/aaai.v35i18.17892"},{"key":"1392_CR5","unstructured":"Wang, L., Ji, X., Mingxing Jia, Q.D.: Deformable part model based multiple pedestrian detection for video surveillance in crowded scenes. In: Proceedings of the 35th AAAI Conference on Artificial Intelligence, pp. 15791\u201315792 (2021)"},{"key":"1392_CR6","doi-asserted-by":"crossref","unstructured":"Li, Q., Arnab, A., Torr, P.H.S.: Holistic, instance-level human parsing. In: Proceedings of British Machine Vision Conference, pp. 4\u20137 (2017)","DOI":"10.5244\/C.31.25"},{"key":"1392_CR7","doi-asserted-by":"crossref","unstructured":"Yang, L., Song, Q., Wang, Z., Jiang, M.: Parsing R-CNN for instance-level human analysis. In: Proceedings of the 32nd IEEE Conference on Computer Vision and Pattern Recognition, pp. 364\u2013373 (2019)","DOI":"10.1109\/CVPR.2019.00045"},{"key":"1392_CR8","doi-asserted-by":"crossref","unstructured":"Yang, L., Song, Q., Wang, Z., Hu, M., Liu, C., Xin, X., Jia, W., Xu, S.: Renovating parsing R-CNN for accurate multiple human parsing. In: Proceedings of the 16th European Conference on Computer Vision, pp. 421\u2013437 (2020)","DOI":"10.1007\/978-3-030-58610-2_25"},{"key":"1392_CR9","doi-asserted-by":"crossref","unstructured":"Ruan, T., Liu, T., Huang, Z., Wei, Y., Wei, S., Zhao, Y.: Devil in the details: Towards accurate single and multiple human parsing. In: Proceedings of the 33rd Conference on Artificial Intelligence, pp. 4814\u20134821 (2019)","DOI":"10.1609\/aaai.v33i01.33014814"},{"key":"1392_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.B.: Mask R-CNN. In: Proceedings of the 29th IEEE Conference on Computer Vision and Pattern Recognition, pp. 6399\u20136408 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"1392_CR11","doi-asserted-by":"crossref","unstructured":"He, H., Zhang, J., Thuraisingham, B., Tao, D.: Progressive one-shot human parsing. In: Proceedings of the 35th Conference on Artificial Intelligence, pp. 1522\u20131530 (2021)","DOI":"10.1609\/aaai.v35i2.16243"},{"key":"1392_CR12","doi-asserted-by":"crossref","unstructured":"Ji, R., Du, D., Zhang, L., Wen, L., Wu, Y., Zhao, C., Huang, F., Lyu, S.: Learning semantic neural tree for human parsing. In: Proceedings of the 16th European Conference on Computer Vision, pp. 205\u2013221 (2020)","DOI":"10.1007\/978-3-030-58601-0_13"},{"key":"1392_CR13","doi-asserted-by":"crossref","unstructured":"Gong, K., Liang, X., Li, Y., Chen, Y., Yang, M., Lin, L.: Instance-level human parsing via part grouping network. In: Proceedings of the 15th European Conference on Computer Vision, pp. 205\u2013221 (2018)","DOI":"10.1007\/978-3-030-01225-0_47"},{"key":"1392_CR14","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Su, C., Zheng, L., Xie, X.: Correlating edge, pose with parsing. In: Proceedings of the 33rd IEEE Conference on Computer Vision and Pattern Recognition, pp. 8897\u20138906 (2020)","DOI":"10.1109\/CVPR42600.2020.00892"},{"key":"1392_CR15","doi-asserted-by":"crossref","unstructured":"Liu, S., Qi, L., Qin, H., Shi, J., Jia, J.: Path aggregation network for instance segmentation. In: Proceedings of the 31st IEEE Conference on Computer Vision and Pattern Recognition, pp. 8759\u20138768 (2018)","DOI":"10.1109\/CVPR.2018.00913"},{"key":"1392_CR16","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., Kavukcuoglu, K.: Spatial transformer networks. In: Proceedings of the Annual Conference on Neural Information Processing Systems., pp. 2017\u20132025 (2015)"},{"key":"1392_CR17","doi-asserted-by":"crossref","unstructured":"Chen, X., Mottaghi, R., Liu, X., Fidler, S., Urtasun, R., Yuille, A.L.: Detect what you can: Detecting and representing objects using holistic models and body parts. In: Proceedings of the 26th IEEE Conference on Computer Vision and Pattern Recognition, pp. 1979\u20131986 (2014)","DOI":"10.1109\/CVPR.2014.254"},{"key":"1392_CR18","doi-asserted-by":"crossref","unstructured":"Zhao, J., Li, J., Cheng, Y., Sim, T., Yan, S., Feng, J.: Understanding humans in crowded scenes: deep nested adversarial learning and a new benchmark for multi-human parsing. In: Proceedings of the ACM Multimedia Conference on Multimedia Conference, pp. 792\u2013800 (2018)","DOI":"10.1145\/3240508.3240509"},{"key":"1392_CR19","unstructured":"Zhou, T., Wang, W., Liu, S., Yang, Y., Gool, L.V.: Spatial transformer networks. In: Proceedings of the 33rd IEEE Conference on Computer Vision and Pattern Recognition, pp. 1622\u20131631 (2021)"},{"key":"1392_CR20","doi-asserted-by":"crossref","unstructured":"Li, J., Wang, C., Zhu, H., Mao, Y., Fang, H., Lu, C.: Crowdpose: efficient crowded scenes pose estimation and a new benchmark. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2019, Long Beach, CA, USA, June 16\u201320, 2019, pp. 10863\u201310872. Computer Vision Foundation\/IEEE (2019)","DOI":"10.1109\/CVPR.2019.01112"},{"key":"1392_CR21","doi-asserted-by":"crossref","unstructured":"Fang, H., Xie, S., Tai, Y., Lu, C.: RMPE: regional multi-person pose estimation. In: IEEE International Conference on Computer Vision, ICCV 2017, Venice, Italy, October 22\u201329, 2017, pp. 2353\u20132362. IEEE Computer Society (2017)","DOI":"10.1109\/ICCV.2017.256"},{"key":"1392_CR22","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1016\/j.neucom.2020.07.002","volume":"413","author":"J Liu","year":"2020","unstructured":"Liu, J., Zhang, Z., Shan, C., Tan, T.: Kinematic skeleton graph augmented network for human parsing. Neurocomputing 413, 457\u2013470 (2020)","journal-title":"Neurocomputing"},{"issue":"7","key":"1392_CR23","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1007\/s00138-020-01104-2","volume":"31","author":"R Wang","year":"2020","unstructured":"Wang, R., Tong, J., Wang, X.: Enhancing feature fusion for human pose estimation. Mach. Vis. Appl. 31(7), 70 (2020)","journal-title":"Mach. Vis. Appl."},{"key":"1392_CR24","doi-asserted-by":"crossref","unstructured":"Xia, F., Wang, P., Chen, X., Yuille, A.L.: Joint multi-person pose estimation and semantic part segmentation. In: Proceedings of the 29th IEEE Conference on Computer Vision and Pattern Recognition, pp. 6080\u20136089 (2017)","DOI":"10.1109\/CVPR.2017.644"},{"key":"1392_CR25","unstructured":"Xiao, D., Zhong, P.: Image semantic segmentation using deep convolutional nets, fully connected conditional random fields, and dilated convolution. In: Proceedings of the 21st IEEE International Conference on High Performance Computing and Communications, pp. 6080\u20136089 (2017)"},{"key":"1392_CR26","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1016\/j.neucom.2021.09.031","volume":"466","author":"J Gui","year":"2021","unstructured":"Gui, J., Zhang, H.: Learning rates for multi-task regularization networks. Neurocomputing 466, 243\u2013251 (2021)","journal-title":"Neurocomputing"},{"key":"1392_CR27","doi-asserted-by":"crossref","unstructured":"Misra, I., Shrivastava, A., Gupta, A., Hebert, M.: Cross-stitch networks for multi-task learning. In: Proceedings of the 28th IEEE Conference on Computer Vision and Pattern Recognition, pp. 3994\u20134003 (2016)","DOI":"10.1109\/CVPR.2016.433"},{"key":"1392_CR28","doi-asserted-by":"crossref","unstructured":"Liang, X., Zhou, H., Xing, E.P.: Dynamic-structured semantic propagation network. In: Proceedings of the 30th IEEE Conference on Computer Vision and Pattern Recognition, pp. 752\u2013761 (2018)","DOI":"10.1109\/CVPR.2018.00085"},{"issue":"6","key":"1392_CR29","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1016\/j.neucom.2020.01.123","volume":"444","author":"Y Xu","year":"2021","unstructured":"Xu, Y., Piao, Z., Zhang, Z., Liu, W., Gao, S.: Sunnet: a novel framework for simultaneous human parsing and pose estimation. Neurocomputing 444(6), 349\u2013355 (2021)","journal-title":"Neurocomputing"},{"issue":"2","key":"1392_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00138-021-01172-y","volume":"32","author":"X Yan","year":"2021","unstructured":"Yan, X., Chen, Z., Wu, Q.M.J., Lu, M., Sun, L.: 3mnet: multi-task, multi-level and multi-channel feature aggregation network for salient object detection. Mach. Vis. Appl. 32(2), 1\u201313 (2021)","journal-title":"Mach. Vis. Appl."},{"key":"1392_CR31","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Girshick, R.B., He, K., Doll\u00e1r, P.: Panoptic feature pyramid networks. In: Proceedings of the 31st IEEE Conference on Computer Vision and Pattern Recognition, pp. 6399\u20136408 (2019)","DOI":"10.1109\/CVPR.2019.00656"},{"key":"1392_CR32","doi-asserted-by":"crossref","unstructured":"Cheng, B., Collins, M.D., Zhu, Y., Liu, T., Huang, T.S., Adam, H., Chen, L.: Panoptic-deeplab: a simple, strong, and fast baseline for bottom-up panoptic segmentation. In: Proceedings of the 32nd IEEE Conference on Computer Vision and Pattern Recognition, pp. 12472\u201312482 (2020)","DOI":"10.1109\/CVPR42600.2020.01249"},{"key":"1392_CR33","doi-asserted-by":"crossref","unstructured":"Papandreou, G., Zhu, T., Chen, L., Gidaris, S., Tompson, J., Murphy, K.: Personlab: person pose estimation and instance segmentation with a bottom-up, part-based, geometric embedding model. In: Proceedings of the 15th European Conference on Computer Vision, pp. 282\u2013299 (2018)","DOI":"10.1007\/978-3-030-01264-9_17"},{"key":"1392_CR34","doi-asserted-by":"crossref","unstructured":"Lin, T., Doll\u00e1r, P., Girshick, R.B., He, K., Hariharan, B., Belongie, S.J.: Feature pyramid networks for object detection. In: Proceedings of the 29th IEEE Conference on Computer Vision and Pattern Recognition, pp. 936\u2013944 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"1392_CR35","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J., Kweon, I.S.: CBAM: convolutional block attention module. In: Proceedings of the 15th European Conference on Computer Vision, pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1392_CR36","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the 31st IEEE Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"1392_CR37","doi-asserted-by":"crossref","unstructured":"Bras\u00f3, G., Kister, N., Leal-Taix\u00e9, L.: The center of attention: center-keypoint grouping via attention for multi-person pose estimation. In: Proceedings of the 18th International Conference on Computer Vision, pp. 11833\u201311843 (2021)","DOI":"10.1109\/ICCV48922.2021.01164"},{"key":"1392_CR38","unstructured":"Chen, K., Wang, J., Pang, J., Cao, Y., Xiong, Y., Li, X., Sun, S., Feng, W.: Mmdetection: open mmlab detection toolbox and benchmark. CoRR arXiv:1906.07155 (2019)"},{"issue":"1","key":"1392_CR39","doi-asserted-by":"publisher","first-page":"29:1","DOI":"10.1145\/3418217","volume":"17","author":"J Li","year":"2021","unstructured":"Li, J., Zhao, J., Lang, C., Li, Y., Wei, Y., Guo, G., Sim, T., Yan, S., Feng, J.: Multi-human parsing with a graph-based generative adversarial model. ACM Trans. Multimed. Comput. Commun. Appl. 17(1), 29:1-29:21 (2021)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"1392_CR40","doi-asserted-by":"crossref","unstructured":"Yang, L., Song, Q., Wang, Z., Liu, Z., Xu, S., Li, Z.: Quality-aware network for human parsing. CoRR arXiv:2103.05997 (2021)","DOI":"10.1109\/TMM.2022.3217413"},{"key":"1392_CR41","doi-asserted-by":"publisher","first-page":"5599","DOI":"10.1109\/TIP.2022.3192989","volume":"31","author":"S Zhang","year":"2022","unstructured":"Zhang, S., Cao, X., Qi, G., Song, Z., Zhou, J.: Aiparsing: anchor-free instance-level human parsing. IEEE Trans. Image Process. 31, 5599\u20135612 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"1392_CR42","unstructured":"Chen, X., Wang, X., Gao, L., Song, J.: Repparser: end-to-end multiple human parsing with representative parts. CoRR arXiv:2208.12908 (2022)"},{"key":"1392_CR43","unstructured":"Crawshaw, M., Koseck\u00e1, J.: SLAW: scaled loss approximate weighting for efficient multi-task learning. CoRR arXiv:2109.08218 (2021)"},{"key":"1392_CR44","unstructured":"Crawshaw, M.: Multi-task learning with deep neural networks: a survey. CoRR arXiv:2009.09796 (2020)"},{"key":"1392_CR45","unstructured":"Kendall, A., Gal, Y., Cipolla, R.: Multi-task learning using uncertainty to weigh losses for scene geometry and semantics. In: Proceedings of the 30th IEEE Conference on Computer Vision and Pattern Recognition, pp. 7482\u20137491 (2018)"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-023-01392-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-023-01392-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-023-01392-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,20]],"date-time":"2023-07-20T19:02:53Z","timestamp":1689879773000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-023-01392-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,5]]},"references-count":45,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,7]]}},"alternative-id":["1392"],"URL":"https:\/\/doi.org\/10.1007\/s00138-023-01392-4","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,5,5]]},"assertion":[{"value":"30 September 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 March 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 March 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 May 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"46"}}