{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:04:33Z","timestamp":1765339473562,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755727","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:55:00Z","timestamp":1761375300000},"page":"719-728","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-Task Label Discovery via Hierarchical Task Tokens for Partially Annotated Dense Predictions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6668-1140","authenticated-orcid":false,"given":"Jingdong","family":"Zhang","sequence":"first","affiliation":[{"name":"Texas A&amp;M University, College Station, Texas, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7986-6143","authenticated-orcid":false,"given":"Hanrong","family":"Ye","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0144-9489","authenticated-orcid":false,"given":"Xin","family":"Li","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, College Station, Texas, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2284-3952","authenticated-orcid":false,"given":"Wenping","family":"Wang","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, College Station, Texas, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0136-9603","authenticated-orcid":false,"given":"Dan","family":"Xu","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2644615"},{"key":"e_1_3_2_1_2_1","first-page":"15869","article-title":"Exploring relational context for multi-task dense prediction","author":"Br\u00fcggemann David","year":"2021","unstructured":"David Br\u00fcggemann, Menelaos Kanakis, Anton Obukhov, Stamatios Georgoulis, and Luc Van Gool. 2021. Exploring relational context for multi-task dense prediction. In ICCV. 15869-15878.","journal-title":"ICCV."},{"key":"e_1_3_2_1_3_1","unstructured":"Mang Cao Sanping Zhou Ye Deng Wenli Huang Le Wang and Jinjun Wang. [n.d.]. MSM: Multi-Scale Mamba in Multi-Task Dense Prediction. ([n.d.])."},{"key":"e_1_3_2_1_4_1","volume-title":"Luca Morreale, Mehdi Noroozi, and Sourav Bhattacharya.","author":"Chavhan Ruchika","year":"2025","unstructured":"Ruchika Chavhan, Abhinav Mehrotra, Malcolm Chadwick, Alberto Gil Ramos, Luca Morreale, Mehdi Noroozi, and Sourav Bhattacharya. 2025. Upcycling Text-to-Image Diffusion Models for Multi-Task Capabilities. arXiv preprint arXiv:2503.11905 (2025)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_2_1_6_1","volume-title":"Indoor semantic segmentation using depth information. arXiv preprint arXiv:1301.3572","author":"Couprie Camille","year":"2013","unstructured":"Camille Couprie, Cl\u00e9ment Farabet, Laurent Najman, and Yann LeCun. 2013. Indoor semantic segmentation using depth information. arXiv preprint arXiv:1301.3572 (2013)."},{"key":"e_1_3_2_1_7_1","first-page":"2650","article-title":"Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture","author":"Eigen David","year":"2015","unstructured":"David Eigen and Rob Fergus. 2015. Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture. In ICCV. 2650-2658.","journal-title":"ICCV."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"e_1_3_2_1_9_1","first-page":"3205","article-title":"Nddr-cnn: Layerwise feature fusing in multi-task cnns by neural discriminative dimensionality reduction","author":"Gao Yuan","year":"2019","unstructured":"Yuan Gao, Jiayi Ma, Mingbo Zhao, Wei Liu, and Alan L Yuille. 2019. Nddr-cnn: Layerwise feature fusing in multi-task cnns by neural discriminative dimensionality reduction. In CVPR. 3205-3214.","journal-title":"CVPR."},{"key":"e_1_3_2_1_10_1","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR. 770-778.","journal-title":"CVPR."},{"key":"e_1_3_2_1_11_1","volume-title":"European Conference on Computer Vision. Springer, 257-275","author":"Hoyer Lukas","year":"2024","unstructured":"Lukas Hoyer, David Joseph Tan, Muhammad Ferjad Naeem, Luc Van Gool, and Federico Tombari. 2024. SemiVL: semi-supervised semantic segmentation with vision-language guidance. In European Conference on Computer Vision. Springer, 257-275."},{"key":"e_1_3_2_1_12_1","volume-title":"Partly Supervised Multitask Learning. arXiv preprint arXiv:2005.02523","author":"Imran Al-Zubaer","year":"2020","unstructured":"Abdullah-Al-Zubaer Imran, Chao Huang, Hui Tang, Wei Fan, Yuan Xiao, Dingjun Hao, Zhen Qian, and Demetri Terzopoulos. 2020. Partly Supervised Multitask Learning. arXiv preprint arXiv:2005.02523 (2020)."},{"key":"e_1_3_2_1_13_1","first-page":"5070","article-title":"Label propagation for deep semi-supervised learning","author":"Iscen Ahmet","year":"2019","unstructured":"Ahmet Iscen, Giorgos Tolias, Yannis Avrithis, and Ondrej Chum. 2019. Label propagation for deep semi-supervised learning. In CVPR. 5070-5079.","journal-title":"CVPR."},{"key":"e_1_3_2_1_14_1","first-page":"896","article-title":"Pseudo-label: The simple and efficient semi-supervised learning method for deep neural networks","volume":"3","author":"Lee Dong-Hyun","year":"2013","unstructured":"Dong-Hyun Lee et al., 2013. Pseudo-label: The simple and efficient semi-supervised learning method for deep neural networks. In ICML, Vol. 3. 896.","journal-title":"ICML"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2018.05.029"},{"key":"e_1_3_2_1_16_1","first-page":"18879","article-title":"Learning multiple dense prediction tasks from partially annotated data","author":"Li Wei-Hong","year":"2022","unstructured":"Wei-Hong Li, Xialei Liu, and Hakan Bilen. 2022. Learning multiple dense prediction tasks from partially annotated data. In CVPR. 18879-18889.","journal-title":"CVPR."},{"key":"e_1_3_2_1_17_1","first-page":"6936","article-title":"Bidirectional learning for domain adaptation of semantic segmentation","author":"Li Yunsheng","year":"2019","unstructured":"Yunsheng Li, Lu Yuan, and Nuno Vasconcelos. 2019. Bidirectional learning for domain adaptation of semantic segmentation. In CVPR. 6936-6945.","journal-title":"CVPR."},{"key":"e_1_3_2_1_18_1","volume-title":"NIPS","volume":"20","author":"Liu Qiuhua","year":"2007","unstructured":"Qiuhua Liu, Xuejun Liao, and Lawrence Carin. 2007. Semi-supervised multitask learning. NIPS, Vol. 20 (2007)."},{"key":"e_1_3_2_1_19_1","first-page":"1871","article-title":"End-to-end multi-task learning with attention","author":"Liu Shikun","year":"2019","unstructured":"Shikun Liu, Edward Johns, and Andrew J Davison. 2019. End-to-end multi-task learning with attention. In CVPR. 1871-1880.","journal-title":"CVPR."},{"key":"e_1_3_2_1_20_1","volume-title":"Swiss army knife: Synergizing biases in knowledge from vision foundation models for multi-task learning. arXiv preprint arXiv:2410.14633","author":"Lu Yuxiang","year":"2024","unstructured":"Yuxiang Lu, Shengcao Cao, and Yu-Xiong Wang. 2024. Swiss army knife: Synergizing biases in knowledge from vision foundation models for multi-task learning. arXiv preprint arXiv:2410.14633 (2024)."},{"key":"e_1_3_2_1_21_1","first-page":"8700","article-title":"Taskology: Utilizing task relations at scale","author":"Lu Yao","year":"2021","unstructured":"Yao Lu, Soren Pirk, Jan Dlabal, Anthony Brohan, Ankita Pasad, Zhao Chen, Vincent Casser, Anelia Angelova, and Ariel Gordon. 2021. Taskology: Utilizing task relations at scale. In CVPR. 8700-8709.","journal-title":"CVPR."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17066"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00326"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00195"},{"key":"e_1_3_2_1_25_1","first-page":"3994","article-title":"Cross-stitch networks for multi-task learning","author":"Misra Ishan","year":"2016","unstructured":"Ishan Misra, Abhinav Shrivastava, Abhinav Gupta, and Martial Hebert. 2016. Cross-stitch networks for multi-task learning. In CVPR. 3994-4003.","journal-title":"CVPR."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01529"},{"key":"e_1_3_2_1_27_1","unstructured":"Alessandro Pieropan Hossein Azizpour Atsuto Maki et al. 2022. Dense FixMatch: a simple semi-supervised learning method for pixel-wise prediction tasks. arXiv preprint arXiv:2210.09919 (2022)."},{"key":"e_1_3_2_1_28_1","volume-title":"Pseudo labeling methods for semi-supervised semantic segmentation: A review and future perspectives","author":"Ran Lingyan","year":"2024","unstructured":"Lingyan Ran, Yali Li, Guoqiang Liang, and Yanning Zhang. 2024. Pseudo labeling methods for semi-supervised semantic segmentation: A review and future perspectives. IEEE Transactions on Circuits and Systems for Video Technology (2024)."},{"key":"e_1_3_2_1_29_1","first-page":"299","article-title":"Transductive semi-supervised deep learning using min-max features","author":"Shi Weiwei","year":"2018","unstructured":"Weiwei Shi, Yihong Gong, Chris Ding, Zhiheng MaXiaoyu Tao, and Nanning Zheng. 2018. Transductive semi-supervised deep learning using min-max features. In ECCV. 299-315.","journal-title":"ECCV."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"e_1_3_2_1_31_1","first-page":"596","volume-title":"NIPS","volume":"33","author":"Sohn Kihyuk","year":"2020","unstructured":"Kihyuk Sohn, David Berthelot, Nicholas Carlini, Zizhao Zhang, Han Zhang, Colin A Raffel, Ekin Dogus Cubuk, Alexey Kurakin, and Chun-Liang Li. 2020. Fixmatch: Simplifying semi-supervised learning with consistency and confidence. NIPS, Vol. 33 (2020), 596-608."},{"key":"e_1_3_2_1_32_1","first-page":"14658","article-title":"Towards Discovering the Effectiveness of Moderately Confident Samples for Semi-Supervised Learning","author":"Tang Hui","year":"2022","unstructured":"Hui Tang and Kui Jia. 2022. Towards Discovering the Effectiveness of Moderately Confident Samples for Semi-Supervised Learning. In CVPR. 14658-14667.","journal-title":"CVPR."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2025.3570425"},{"key":"e_1_3_2_1_34_1","volume-title":"NIPS","volume":"30","author":"Tarvainen Antti","year":"2017","unstructured":"Antti Tarvainen and Harri Valpola. 2017. Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results. NIPS, Vol. 30 (2017)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2024.3389672"},{"key":"e_1_3_2_1_36_1","volume-title":"Marc Proesmans, Dengxin Dai, and Luc Van Gool.","author":"Vandenhende Simon","year":"2021","unstructured":"Simon Vandenhende, Stamatios Georgoulis, Wouter Van Gansbeke, Marc Proesmans, Dengxin Dai, and Luc Van Gool. 2021. Multi-task learning for dense prediction tasks: A survey. PAMI (2021)."},{"key":"e_1_3_2_1_37_1","volume-title":"Mti-net: Multi-scale task interaction networks for multi-task learning","author":"Vandenhende Simon","year":"2020","unstructured":"Simon Vandenhende, Stamatios Georgoulis, and Luc Van Gool. 2020. Mti-net: Multi-scale task interaction networks for multi-task learning. In ECCV. Springer, 527-543."},{"key":"e_1_3_2_1_38_1","first-page":"2505","article-title":"Semi-supervised multi-task learning for semantics and depth","author":"Wang Yufeng","year":"2022","unstructured":"Yufeng Wang, Yi-Hsuan Tsai, Wei-Chih Hung, Wenrui Ding, Shuo Liu, and Ming-Hsuan Yang. 2022. Semi-supervised multi-task learning for semantics and depth. In WACV. 2505-2514.","journal-title":"WACV."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01070"},{"key":"e_1_3_2_1_40_1","first-page":"675","article-title":"Pad-net: Multi-tasks guided prediction-and-distillation network for simultaneous depth estimation and scene parsing","author":"Xu Dan","year":"2018","unstructured":"Dan Xu, Wanli Ouyang, Xiaogang Wang, and Nicu Sebe. 2018. Pad-net: Multi-tasks guided prediction-and-distillation network for simultaneous depth estimation and scene parsing. In CVPR. 675-684.","journal-title":"CVPR."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00699"},{"key":"e_1_3_2_1_42_1","volume-title":"Unimatch v2: Pushing the limit of semi-supervised semantic segmentation","author":"Yang Lihe","year":"2025","unstructured":"Lihe Yang, Zhen Zhao, and Hengshuang Zhao. 2025. Unimatch v2: Pushing the limit of semi-supervised semantic segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i3.25424"},{"volume-title":"The Thirteenth International Conference on Learning Representations.","author":"Yang Yuqi","key":"e_1_3_2_1_44_1","unstructured":"Yuqi Yang, Peng-Tao Jiang, Qibin Hou, Hao Zhang, Jinwei Chen, and Bo Li. [n.d.]. Multi-Task Dense Predictions via Unleashing the Power of Diffusion. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_2_1_45_1","volume-title":"Inverted Pyramid Multi-task Transformer for Dense Scene Understanding. ECCV","author":"Ye Hanrong","year":"2022","unstructured":"Hanrong Ye and Dan Xu. 2022. Inverted Pyramid Multi-task Transformer for Dense Scene Understanding. ECCV (2022)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01995"},{"key":"e_1_3_2_1_47_1","unstructured":"Hanrong Ye and Dan Xu. 2023b. TaskPrompter: Spatial-Channel Multi-Task Prompting for Dense Scene Understanding. In ICLR."},{"key":"e_1_3_2_1_48_1","unstructured":"Hanrong Ye and Dan Xu. 2024. DiffusionMTL: Learning Multi-Task Denoising Diffusion Model from Partially Annotated Data. In CVPR."},{"key":"e_1_3_2_1_49_1","first-page":"11197","article-title":"Robust learning through cross-task consistency","author":"Zamir Amir R","year":"2020","unstructured":"Amir R Zamir, Alexander Sax, Nikhil Cheerla, Rohan Suri, Zhangjie Cao, Jitendra Malik, and Leonidas J Guibas. 2020. Robust learning through cross-task consistency. In CVPR. 11197-11206.","journal-title":"CVPR."},{"key":"e_1_3_2_1_50_1","first-page":"468","article-title":"Discriminatively trained dense surface normal estimation","author":"Zeisl Bernhard","year":"2014","unstructured":"Bernhard Zeisl, Marc Pollefeys, et al., 2014. Discriminatively trained dense surface normal estimation. In ECCV. Springer, 468-484.","journal-title":"ECCV. Springer"},{"key":"e_1_3_2_1_51_1","first-page":"7223","article-title":"Joint learning of saliency detection and weakly supervised semantic segmentation","author":"Zeng Yu","year":"2019","unstructured":"Yu Zeng, Yunzhi Zhuge, Huchuan Lu, and Lihe Zhang. 2019. Joint learning of saliency detection and weakly supervised semantic segmentation. In ICCV. 7223-7233.","journal-title":"ICCV."},{"key":"e_1_3_2_1_52_1","volume-title":"Rethinking of Feature Interaction for Multi-task Learning on Dense Prediction. arXiv preprint arXiv:2312.13514","author":"Zhang Jingdong","year":"2023","unstructured":"Jingdong Zhang, Jiayuan Fan, Peng Ye, Bo Zhang, Hancheng Ye, Baopu Li, Yancheng Cai, and Tao Chen. 2023. Rethinking of Feature Interaction for Multi-task Learning on Dense Prediction. arXiv preprint arXiv:2312.13514 (2023)."},{"key":"e_1_3_2_1_53_1","volume-title":"BridgeNet: Comprehensive and Effective Feature Interactions via Bridge Feature for Multi-Task Dense Predictions","author":"Zhang Jingdong","year":"2025","unstructured":"Jingdong Zhang, Jiayuan Fan, Peng Ye, Bo Zhang, Hancheng Ye, Baopu Li, Yancheng Cai, and Tao Chen. 2025. BridgeNet: Comprehensive and Effective Feature Interactions via Bridge Feature for Multi-Task Dense Predictions. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_2_1_54_1","first-page":"235","article-title":"Joint task-recursive learning for semantic segmentation and depth estimation","author":"Zhang Zhenyu","year":"2018","unstructured":"Zhenyu Zhang, Zhen Cui, Chunyan Xu, Zequn Jie, Xiang Li, and Jian Yang. 2018. Joint task-recursive learning for semantic segmentation and depth estimation. In ECCV. 235-251.","journal-title":"ECCV."},{"key":"e_1_3_2_1_55_1","first-page":"4106","article-title":"Pattern-affinitive propagation across depth, surface normal and semantic segmentation","author":"Zhang Zhenyu","year":"2019","unstructured":"Zhenyu Zhang, Zhen Cui, Chunyan Xu, Yan Yan, Nicu Sebe, and Jian Yang. 2019. Pattern-affinitive propagation across depth, surface normal and semantic segmentation. In CVPR. 4106-4115.","journal-title":"CVPR."},{"key":"e_1_3_2_1_56_1","first-page":"289","article-title":"Unsupervised domain adaptation for semantic segmentation via class-balanced self-training","author":"Zou Yang","year":"2018","unstructured":"Yang Zou, Zhiding Yu, BVK Kumar, and Jinsong Wang. 2018. Unsupervised domain adaptation for semantic segmentation via class-balanced self-training. In ECCV. 289-305.","journal-title":"ECCV."},{"key":"e_1_3_2_1_57_1","first-page":"5982","article-title":"Confidence regularized self-training","author":"Zou Yang","year":"2019","unstructured":"Yang Zou, Zhiding Yu, Xiaofeng Liu, BVK Kumar, and Jinsong Wang. 2019. Confidence regularized self-training. In ICCV. 5982-5991.","journal-title":"ICCV."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755727","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:00:27Z","timestamp":1765339227000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755727"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":57,"alternative-id":["10.1145\/3746027.3755727","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755727","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}