{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:55:03Z","timestamp":1781538903855,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Beijing Natural Science Foundation","award":["4254078"],"award-info":[{"award-number":["4254078"]}]},{"name":"Funded by Frontier Technologies R&D Program of Jiangsu","award":["BF2025012"],"award-info":[{"award-number":["BF2025012"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810576","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1222-1230","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["PSRNet: Progressive Semantic Refinement for Human Parsing via Text Conditioning and Embedding-Based Calibration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3711-3821","authenticated-orcid":false,"given":"Ming","family":"Meng","sequence":"first","affiliation":[{"name":"School of Data Science and Intelligent Media, Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6271-5239","authenticated-orcid":false,"given":"Hanwen","family":"Liu","sequence":"additional","affiliation":[{"name":"Hainan International College, Communication University of China, Hainan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7181-7574","authenticated-orcid":false,"given":"Xingxing","family":"Xiang","sequence":"additional","affiliation":[{"name":"School of International Business, University of International Business and Economics, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3562-5612","authenticated-orcid":false,"given":"Long","family":"Ye","sequence":"additional","affiliation":[{"name":"School of Data Science and Intelligent Media, Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3689-6868","authenticated-orcid":false,"given":"Lei","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer and Artificial Intelligence, Beijing Technology and Business University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6324-1712","authenticated-orcid":false,"given":"Zhaoxin","family":"Fan","sequence":"additional","affiliation":[{"name":"Beijing Advanced Innovation Center for Future Blockchain and Privacy Computing, Beihang University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Liang-Chieh Chen George Papandreou Iasonas Kokkinos Kevin Murphy and Alan\u00a0L Yuille. 2017. Deeplab: Semantic image segmentation with deep convolutional nets atrous convolution and fully connected crfs. IEEE transactions on pattern analysis and machine intelligence 40 4 (2017) 834\u2013848.","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.396"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01445"},{"key":"e_1_3_3_1_6_2","unstructured":"Xiaojia Chen Xuanhan Wang Lianli Gao Beitao Chen Jingkuan Song and HenTao Shen. 2023. Ciparsing: Unifying causality properties into multiple human parsing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.12218 (2023)."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"e_1_3_3_1_8_2","unstructured":"Zheng Ding Jieke Wang and Zhuowen Tu. 2022. Open-vocabulary universal image segmentation with maskclip. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2208.08984 (2022)."},{"key":"e_1_3_3_1_9_2","volume-title":"Advances in Neural Information Processing Systems","author":"Frome Andrea","year":"2013","unstructured":"Andrea Frome, Greg\u00a0S Corrado, Jon Shlens, Samy Bengio, Jeff Dean, Marc'\u00a0Aurelio Ranzato, and Tomas Mikolov. 2013. DeViSE: A Deep Visual-Semantic Embedding Model. In Advances in Neural Information Processing Systems , C.J. Burges, L.\u00a0Bottou, M.\u00a0Welling, Z.\u00a0Ghahramani, and K.Q. Weinberger (Eds.), Vol.\u00a026. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2013\/file\/7cce53cf90577442771720a370c3c723-Paper.pdf"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00763"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_47"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.715"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00883"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58601-0_13"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00714"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00710"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02813"},{"key":"e_1_3_3_1_19_2","unstructured":"Boyi Li Kilian\u00a0Q Weinberger Serge Belongie Vladlen Koltun and Ren\u00e9 Ranftl. 2022. Language-driven semantic segmentation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2201.03546 (2022)."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Peike Li Yunqiu Xu Yunchao Wei and Yi Yang. 2020. Self-correction for human parsing. IEEE Transactions on Pattern Analysis and Machine Intelligence 44 6 (2020) 3260\u20133271.","DOI":"10.1109\/TPAMI.2020.3048039"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00443"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Kunliang Liu Rize Jin Yuelong Li Jianming Wang and Wonjun Hwang. 2024. Channel and Spatial Enhancement Network for human parsing. Image and Vision Computing 152 (2024) 105332.","DOI":"10.1016\/j.imavis.2024.105332"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Yunan Liu Chunpeng Wang Mingyu Lu Jian Yang Jie Gui and Shanshan Zhang. 2024. From simple to complex scenes: Learning robust feature representations for accurate human parsing. IEEE Transactions on Pattern Analysis and Machine Intelligence 46 8 (2024) 5449\u20135462.","DOI":"10.1109\/TPAMI.2024.3366769"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00695"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_26"},{"key":"e_1_3_3_1_27_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01755"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014814"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Jingya Song Qingxuan Shi Yihang Li and Fang Yang. 2022. Enhanced context learning with transformer for human parsing. Applied Sciences 12 15 (2022) 7821.","DOI":"10.3390\/app12157821"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02104"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00580"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00895"},{"key":"e_1_3_3_1_34_2","unstructured":"Yuxin Wu Alexander Kirillov Francisco Massa Wan-Yen Lo and Ross Girshick. 2019. Detectron2. https:\/\/github.com\/facebookresearch\/detectron2."},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00845"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01760"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"crossref","unstructured":"Lu Yang Wenhe Jia Shan Li and Qing Song. 2024. Deep learning technique for human parsing: A survey and outlook. International Journal of Computer Vision 132 8 (2024) 3270\u20133301.","DOI":"10.1007\/s11263-024-02031-9"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00045"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"crossref","unstructured":"Sanyi Zhang Xiaochun Cao Guo-Jun Qi Zhanjie Song and Jie Zhou. 2022. AIParsing: Anchor-Free Instance-Level Human Parsing. IEEE Transactions on Image Processing 31 (2022) 5599\u20135612.","DOI":"10.1109\/TIP.2022.3192989"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58586-0_12"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00899"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"crossref","unstructured":"Xiaomei Zhang Xiangyu Zhu Ming Tang and Zhen Lei. 2025. Deep learning for human parsing: a survey. Comput. Surveys 58 1 (2025) 1\u201333.","DOI":"10.1145\/3748717"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00892"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.204"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:12:07Z","timestamp":1781536327000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810576"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":44,"alternative-id":["10.1145\/3805622.3810576","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810576","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}