{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:41:24Z","timestamp":1755823284703,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the Key Research and Development Program of Shaanxi","award":["Grant 2023-YBGY-076"],"award-info":[{"award-number":["Grant 2023-YBGY-076"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611934","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"2016-2024","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Object Part Parsing with Hierarchical Dual Transformer"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4968-148X","authenticated-orcid":false,"given":"Jiamin","family":"Chen","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2029-6588","authenticated-orcid":false,"given":"Jianlou","family":"Si","sequence":"additional","affiliation":[{"name":"SenseTime, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2609-7408","authenticated-orcid":false,"given":"Naihao","family":"Liu","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8585-283X","authenticated-orcid":false,"given":"Yao","family":"Wu","sequence":"additional","affiliation":[{"name":"SenseTime, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1970-8634","authenticated-orcid":false,"given":"Li","family":"Niu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8761-5563","authenticated-orcid":false,"given":"Chen","family":"Qian","sequence":"additional","affiliation":[{"name":"SenseTime, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Hierarchical Pyramid Representations for Semantic Segmentation. arXiv preprint arXiv:2104.01792","author":"Aizawa Hiroaki","year":"2021","unstructured":"Hiroaki Aizawa, Yukihiro Domae, and Kunihito Kato. 2021. Hierarchical Pyramid Representations for Semantic Segmentation. arXiv preprint arXiv:2104.01792 (2021)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00584"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_4_1","volume-title":"Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation. In European Conference on Computer Vision.","author":"Chen Liang-Chieh","year":"2018","unstructured":"Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, and Hartwig Adam. 2018. Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation. In European Conference on Computer Vision."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.254"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"e_1_3_2_1_7_1","first-page":"17864","article-title":"Per-pixel classification is not all you need for semantic segmentation","volume":"34","author":"Cheng Bowen","year":"2021","unstructured":"Bowen Cheng, Alex Schwing, and Alexander Kirillov. 2021. Per-pixel classification is not all you need for semantic segmentation. Advances in Neural Information Processing Systems, Vol. 34 (2021), 17864--17875.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_8_1","unstructured":"MMSegmentation Contributors. 2020. MMSegmentation: OpenMMLab Semantic Segmentation Toolbox and Benchmark. https:\/\/github.com\/open-mmlab\/mmsegmentation."},{"key":"e_1_3_2_1_9_1","volume-title":"International Conference on Learning Representations.","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. 2020. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.316"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00403"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00763"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_47"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6728"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.01.126"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00559"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00166"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00131"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_26"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00580"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2021.104190"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6832"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58598-3_24"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00117"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01240"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16339"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00584"},{"key":"e_1_3_2_1_29_1","volume-title":"Adaptive Graph Representation Learning and Reasoning for Face Parsing. arXiv e-prints","author":"Te Gusi","year":"2021","unstructured":"Gusi Te, Wei Hu, Yinglu Liu, Hailin Shi, and Tao Mei. 2021. Adaptive Graph Representation Learning and Reasoning for Face Parsing. arXiv e-prints (2021), arXiv-2101."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58610-2_16"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00895"},{"key":"e_1_3_2_1_32_1","volume-title":"Anchor DETR: Query Design for Transformer-Based Object Detection. arXiv preprint arXiv:2109.07107","author":"Wang Yingming","year":"2021","unstructured":"Yingming Wang, Xiangyu Zhang, Tong Yang, and Jian Sun. 2021. Anchor DETR: Query Design for Transformer-Based Object Detection. arXiv preprint arXiv:2109.07107 (2021)."},{"key":"e_1_3_2_1_33_1","first-page":"12077","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","volume":"34","author":"Xie Enze","year":"2021","unstructured":"Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M Alvarez, and Ping Luo. 2021. SegFormer: Simple and efficient design for semantic segmentation with transformers. Advances in Neural Information Processing Systems, Vol. 34 (2021), 12077--12090.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8296296"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11571-020-09615-4"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_11"},{"key":"e_1_3_2_1_37_1","first-page":"7281","article-title":"Hrformer: High-resolution vision transformer for dense predict","volume":"34","author":"Yuan Yuhui","year":"2021","unstructured":"Yuhui Yuan, Rao Fu, Lang Huang, Weihong Lin, Chao Zhang, Xilin Chen, and Jingdong Wang. 2021. Hrformer: High-resolution vision transformer for dense predict. Advances in Neural Information Processing Systems, Vol. 34 (2021), 7281--7293.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_38_1","volume-title":"Ocnet: Object context network for scene parsing. arXiv preprint arXiv:1809.00916","author":"Yuan Yuhui","year":"2018","unstructured":"Yuhui Yuan, Lang Huang, Jianyuan Guo, Chao Zhang, Xilin Chen, and Jingdong Wang. 2018. Ocnet: Object context network for scene parsing. arXiv preprint arXiv:1809.00916 (2018)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00899"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01368"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00412"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-25393-0_25"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Ottawa ON Canada","acronym":"MM '23"},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611934","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611934","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:04:37Z","timestamp":1755821077000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611934"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":44,"alternative-id":["10.1145\/3581783.3611934","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611934","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}