{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T17:33:09Z","timestamp":1777570389516,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":80,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Natural Sciences and Engineering Research Council of Canada (NSERC) Discovery Grants","award":["RGPIN-2019-04575"],"award-info":[{"award-number":["RGPIN-2019-04575"]}]},{"name":"Guangzhou Key Research and Development Project","award":["202206080008"],"award-info":[{"award-number":["202206080008"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62001464"],"award-info":[{"award-number":["62001464"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611738","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"3307-3316","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":15,"title":["SemanticRT: A Large-Scale Dataset and Method for Robust Semantic Segmentation in Multispectral Images"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4059-5902","authenticated-orcid":false,"given":"Wei","family":"Ji","sequence":"first","affiliation":[{"name":"University of Alberta, Edmonton, AB, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0811-4988","authenticated-orcid":false,"given":"Jingjing","family":"Li","sequence":"additional","affiliation":[{"name":"University of Alberta, Edmonton, AB, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3498-8283","authenticated-orcid":false,"given":"Cheng","family":"Bian","sequence":"additional","affiliation":[{"name":"ByteDance Ltd, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5333-1394","authenticated-orcid":false,"given":"Zhicheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"JancsiTech, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3261-3533","authenticated-orcid":false,"given":"Li","family":"Cheng","sequence":"additional","affiliation":[{"name":"University of Alberta, Edmonton, AB, Canada"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2974099"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.3390\/jcm8091446"},{"key":"e_1_3_2_1_3_1","volume-title":"Semantickitti: A dataset for semantic scene understanding of lidar sequences. In ICCV. 9297--9307.","author":"Behley Jens","year":"2019","unstructured":"Jens Behley, Martin Garbade, Andres Milioto, Jan Quenzel, Sven Behnke, Cyrill Stachniss, and Jurgen Gall. 2019. Semantickitti: A dataset for semantic scene understanding of lidar sequences. In ICCV. 9297--9307."},{"key":"e_1_3_2_1_4_1","volume-title":"Semantic image segmentation with deep convolutional nets and fully connected crfs. arXiv preprint arXiv:1412.7062","author":"Chen Liang-Chieh","year":"2014","unstructured":"Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy, and Alan L Yuille. 2014. Semantic image segmentation with deep convolutional nets and fully connected crfs. arXiv preprint arXiv:1412.7062 (2014)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Liang-Chieh Chen Yukun Zhu George Papandreou Florian Schroff and Hartwig Adam. 2018. Encoder-decoder with atrous separable convolution for semantic image segmentation. In ECCV. 801--818.","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infrared.2019.103184"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Xiaokang Chen Kwan-Yee Lin Jingbo Wang Wayne Wu Chen Qian Hongsheng Li and Gang Zeng. 2020b. Bi-directional cross-modality feature propagation with separation-and-aggregation gate for RGB-D semantic segmentation. In ECCV. 561--577.","DOI":"10.1007\/978-3-030-58621-8_33"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Marius Cordts Mohamed Omran Sebastian Ramos Timo Rehfeld Markus Enzweiler Rodrigo Benenson Uwe Franke Stefan Roth and Bernt Schiele. 2016. The cityscapes dataset for semantic urban scene understanding. In CVPR. 3213--3223.","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2006.06.010"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Fuqin Deng Hua Feng Mingjian Liang Hongmin Wang Yong Yang Yuan Gao Junfeng Chen Junjie Hu Xiyue Guo and Tin Lun Lam. 2021. FEANet: Feature-Enhanced Attention Network for RGB-Thermal Real-time Semantic Segmentation. In IROS. 4467--4473.","DOI":"10.1109\/IROS51168.2021.9636084"},{"key":"e_1_3_2_1_12_1","unstructured":"Deng-Ping Fan Ming-Ming Cheng Jiang-Jiang Liu Shang-Hua Gao Qibin Hou and Ali Borji. 2018. Salient objects in clutter: Bringing salient object detection to the foreground. In ECCV. 186--202."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.2972974"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Jun Fu Jing Liu Haijie Tian Yong Li Yongjun Bao Zhiwei Fang and Hanqing Lu. 2019. Dual attention network for scene segmentation. In CVPR. 3146--3154.","DOI":"10.1109\/CVPR.2019.00326"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2018.05.018"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Andreas Geiger Philip Lenz and Raquel Urtasun. 2012. Are we ready for autonomous driving? the kitti vision benchmark suite. In CVPR. 3354--3361.","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Qishen Ha Kohei Watanabe Takumi Karasawa Yoshitaka Ushiku and Tatsuya Harada. 2017. MFNet: Towards real-time semantic segmentation for autonomous vehicles with multi-spectral scenes. In IROS. 5108--5115.","DOI":"10.1109\/IROS.2017.8206396"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR. 770--778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","volume-title":"Ccnet: Criss-cross attention for semantic segmentation. In ICCV. 603--612.","author":"Huang Zilong","year":"2019","unstructured":"Zilong Huang, Xinggang Wang, Lichao Huang, Chang Huang, Yunchao Wei, and Wenyu Liu. 2019. Ccnet: Criss-cross attention for semantic segmentation. In ICCV. 603--612."},{"key":"e_1_3_2_1_20_1","unstructured":"INO. 2012. Video Analytics Dataset. https:\/\/www.ino.ca\/en\/technologies\/video-analytics-dataset\/."},{"key":"e_1_3_2_1_21_1","volume-title":"2023 a. Segment anything is not always perfect: An investigation of sam on different real-world applications. arXiv preprint arXiv:2304.05750","author":"Ji Wei","year":"2023","unstructured":"Wei Ji, Jingjing Li, Qi Bi, Tingwei Liu, Wenbo Li, and Li Cheng. 2023 a. Segment anything is not always perfect: An investigation of sam on different real-world applications. arXiv preprint arXiv:2304.05750 (2023)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Wei Ji Jingjing Li Cheng Bian Zongwei Zhou Jiaying Zhao Alan L Yuille and Li Cheng. 2023 b. Multispectral Video Semantic Segmentation: A Benchmark Dataset and Baseline. In CVPR. 1094--1104.","DOI":"10.1109\/CVPR52729.2023.00112"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Wei Ji Jingjing Li Shuang Yu Miao Zhang Yongri Piao Shunyu Yao Qi Bi Kai Ma Yefeng Zheng Huchuan Lu et al. 2021a. Calibrated RGB-D salient object detection. In CVPR. 9471--9481.","DOI":"10.1109\/CVPR46437.2021.00935"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3154931"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Wei Ji Shuang Yu Junde Wu Kai Ma Cheng Bian Qi Bi Jingjing Li Hanruo Liu Li Cheng and Yefeng Zheng. 2021b. Learning calibrated medical image segmentation via multi-rater agreement modeling. In CVPR. 12341--12351.","DOI":"10.1109\/CVPR46437.2021.01216"},{"key":"e_1_3_2_1_26_1","volume-title":"LLVIP: A Visible-infrared Paired Dataset for Low-light Vision. In ICCVW. 3496--3504.","author":"Jia Xinyu","year":"2021","unstructured":"Xinyu Jia, Chuang Zhu, Minzhen Li, Wenqi Tang, and Wenli Zhou. 2021. LLVIP: A Visible-infrared Paired Dataset for Low-light Vision. In ICCVW. 3496--3504."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Alexander Kirillov Eric Mintun Nikhila Ravi Hanzi Mao Chloe Rolland Laura Gustafson Tete Xiao Spencer Whitehead Alexander C Berg Wan-Yen Lo et al. 2023. Segment anything. arXiv preprint arXiv:2304.02643 (2023).","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.02.003"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3009373"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01734-1"},{"key":"e_1_3_2_1_31_1","unstructured":"Jingjing Li Tianyu Yang Wei Ji Jue Wang and Li Cheng. 2022. Exploring Denoised Cross-Video Contrast for Weakly-Supervised Temporal Action Localization. In CVPR. 19914--19924."},{"key":"e_1_3_2_1_32_1","volume-title":"Refinenet: Multi-path refinement networks for high-resolution semantic segmentation. In CVPR. 1925--1934.","author":"Lin Guosheng","year":"2017","unstructured":"Guosheng Lin, Anton Milan, Chunhua Shen, and Ian Reid. 2017. Refinenet: Multi-path refinement networks for high-resolution semantic segmentation. In CVPR. 1925--1934."},{"key":"e_1_3_2_1_33_1","unstructured":"Songtao Liu Di Huang et al. 2018. Receptive field block net for accurate and fast object detection. In ECCV. 385--400."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Ziwei Liu Xiaoxiao Li Ping Luo Chen-Change Loy and Xiaoou Tang. 2015. Semantic Image Segmentation via Deep Parsing Network. In ICCV. 1377--1385.","DOI":"10.1109\/ICCV.2015.162"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Ze Liu Yutong Lin Yue Cao Han Hu Yixuan Wei Zheng Zhang Stephen Lin and Baining Guo. 2021. Swin transformer: Hierarchical vision transformer using shifted windows. In ICCV. 10012--10022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Jonathan Long Evan Shelhamer and Trevor Darrell. 2015. Fully convolutional networks for semantic segmentation. In CVPR. 3431--3440.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2018.02.004"},{"key":"e_1_3_2_1_38_1","volume-title":"FusionGAN: A generative adversarial network for infrared and visible image fusion. Information fusion","author":"Ma Jiayi","year":"2019","unstructured":"Jiayi Ma, Wei Yu, Pengwei Liang, Chang Li, and Junjun Jiang. 2019b. FusionGAN: A generative adversarial network for infrared and visible image fusion. Information fusion, Vol. 48 (2019), 11--26."},{"key":"e_1_3_2_1_39_1","volume-title":"Thermal infrared image semantic segmentation for night-time driving scenes based on deep learning. Multimedia Tools and Applications","author":"Maheswari B","year":"2023","unstructured":"B Maheswari and SR Reeja. 2023. Thermal infrared image semantic segmentation for night-time driving scenes based on deep learning. Multimedia Tools and Applications (2023), 1--26."},{"key":"e_1_3_2_1_40_1","volume-title":"Vspw: A large-scale dataset for video scene parsing in the wild. In CVPR. 4133--4143.","author":"Miao Jiaxu","year":"2021","unstructured":"Jiaxu Miao, Yunchao Wei, Yu Wu, Chen Liang, Guangrui Li, and Yi Yang. 2021. Vspw: A large-scale dataset for video scene parsing in the wild. In CVPR. 4133--4143."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.01.005"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Roozbeh Mottaghi Xianjie Chen Xiaobai Liu Nam-Gyu Cho Seong-Whan Lee Sanja Fidler Raquel Urtasun and Alan Yuille. 2014. The role of context for object detection and semantic segmentation in the wild. In CVPR. 891--898.","DOI":"10.1109\/CVPR.2014.119"},{"key":"e_1_3_2_1_43_1","unstructured":"Hyeonwoo Noh Seunghoon Hong and Bohyung Han. 2015. Learning deconvolution network for semantic segmentation. In ICCV. 1520--1528."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3004325"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093520"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Yongri Piao Wei Ji Jingjing Li Miao Zhang and Huchuan Lu. 2019. Depth-induced multi-scale recurrent attention network for saliency detection. In ICCV. 7254--7263.","DOI":"10.1109\/ICCV.2019.00735"},{"key":"e_1_3_2_1_47_1","volume-title":"U-net: Convolutional networks for biomedical image segmentation. In MICCAI. 234--241.","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 2015. U-net: Convolutional networks for biomedical image segmentation. In MICCAI. 234--241."},{"key":"e_1_3_2_1_48_1","volume-title":"Ajay Pal Singh, and Jaspreet Singh","author":"Sharma Neha","year":"2020","unstructured":"Neha Sharma, AS Arora, Ajay Pal Singh, and Jaspreet Singh. 2020. The role of infrared thermal imaging in road patrolling using unmanned aerial vehicles. Unmanned Aerial Vehicle: Applications in Agriculture and Environment (2020), 143--157."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Shreyas S Shivakumar Neil Rodrigues Alex Zhou Ian D Miller Vijay Kumar and Camillo J Taylor. 2020. Pst900: Rgb-thermal calibration dataset and segmentation network. In ICRA. 9441--9447.","DOI":"10.1109\/ICRA40945.2020.9196831"},{"key":"e_1_3_2_1_50_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2904733"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2020.2993143"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.10.034"},{"key":"e_1_3_2_1_54_1","volume-title":"A survey on deep learning-based architectures for semantic segmentation on 2d images. Applied Artificial Intelligence","author":"Ulku Irem","year":"2022","unstructured":"Irem Ulku and Erdem Akag\u00fcnd\u00fcz. 2022. A survey on deep learning-based architectures for semantic segmentation on 2d images. Applied Artificial Intelligence (2022), 1--45."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2924171"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Panqu Wang Pengfei Chen Ye Yuan Ding Liu Zehua Huang Xiaodi Hou and Garrison Cottrell. 2018a. Understanding convolution for semantic segmentation. In WACV. 1451--1460.","DOI":"10.1109\/WACV.2018.00163"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Wenhai Wang Enze Xie Xiang Li Deng-Ping Fan Kaitao Song Ding Liang Tong Lu Ping Luo and Ling Shao. 2021. Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. In ICCV. 568--578.","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"crossref","unstructured":"Xiaolong Wang Ross Girshick Abhinav Gupta and Kaiming He. 2018b. Non-Local Neural Networks. In CVPR. 7794--7803.","DOI":"10.1109\/CVPR.2018.00813"},{"key":"e_1_3_2_1_59_1","volume-title":"Cbam: Convolutional block attention module. In ECCV. 3--19.","author":"Woo Sanghyun","year":"2018","unstructured":"Sanghyun Woo, Jongchan Park, Joon-Young Lee, and In So Kweon. 2018. Cbam: Convolutional block attention module. In ECCV. 3--19."},{"key":"e_1_3_2_1_60_1","first-page":"12077","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","volume":"34","author":"Xie Enze","year":"2021","unstructured":"Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M Alvarez, and Ping Luo. 2021. SegFormer: Simple and efficient design for semantic segmentation with transformers. NeurIPS, Vol. 34 (2021), 12077--12090.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infrared.2020.103628"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"crossref","unstructured":"Maoke Yang Kun Yu Chi Zhang Zhiwei Li and Kuiyuan Yang. 2018. DenseASPP for Semantic Segmentation in Street Scenes. In CVPR. 3684--3692.","DOI":"10.1109\/CVPR.2018.00388"},{"key":"e_1_3_2_1_63_1","unstructured":"Changqian Yu Jingbo Wang Chao Peng Changxin Gao Gang Yu and Nong Sang. 2018. Learning a discriminative feature network for semantic segmentation. In CVPR. 1857--1866."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICUAS.2017.7991306"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"crossref","unstructured":"Yuhui Yuan Xilin Chen and Jingdong Wang. 2020. Object-contextual representations for semantic segmentation. In ECCV. 173--190.","DOI":"10.1007\/978-3-030-58539-6_11"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"crossref","unstructured":"Hang Zhang Kristin Dana Jianping Shi Zhongyue Zhang Xiaogang Wang Ambrish Tyagi and Amit Agrawal. 2018a. Context encoding for semantic segmentation. In CVPR. 7151--7160.","DOI":"10.1109\/CVPR.2018.00747"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"crossref","unstructured":"Hang Zhang Han Zhang Chenguang Wang and Junyuan Xie. 2019b. Co-occurrent features in semantic segmentation. In CVPR. 548--557.","DOI":"10.1109\/CVPR.2019.00064"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2990341"},{"key":"e_1_3_2_1_69_1","unstructured":"Miao Zhang Jingjing Li Wei Ji Yongri Piao and Huchuan Lu. 2019a. Memory-oriented decoder for light field salient object detection. In NeurIPS. 898--908."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"crossref","unstructured":"Miao Zhang Jie Liu Yifei Wang Yongri Piao Shunyu Yao Wei Ji Jingjing Li Huchuan Lu and Zhongxuan Luo. 2021a. Dynamic context-sensitive filtering network for video salient object detection. In ICCV. 1553--1563.","DOI":"10.1109\/ICCV48922.2021.00158"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"crossref","unstructured":"Pengyu Zhang Jie Zhao Dong Wang Huchuan Lu and Xiang Ruan. 2022. Visible-thermal UAV tracking: A large-scale benchmark and new baseline. In CVPR. 8886--8895.","DOI":"10.1109\/CVPR52688.2022.00868"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"crossref","unstructured":"Qiang Zhang Shenlu Zhao Yongjiang Luo Dingwen Zhang Nianchang Huang and Jungong Han. 2021b. ABMDRNet: Adaptive-weighted Bi-directional Modality Difference Reduction Network for RGB-T Semantic Segmentation. In CVPR. 2633--2642.","DOI":"10.1109\/CVPR46437.2021.00266"},{"key":"e_1_3_2_1_73_1","volume-title":"Exfuse: Enhancing feature fusion for semantic segmentation. In ECCV. 269--284.","author":"Zhang Zhenli","year":"2018","unstructured":"Zhenli Zhang, Xiangyu Zhang, Chao Peng, Xiangyang Xue, and Jian Sun. 2018b. Exfuse: Enhancing feature fusion for semantic segmentation. In ECCV. 269--284."},{"key":"e_1_3_2_1_74_1","unstructured":"Hengshuang Zhao Xiaojuan Qi Xiaoyong Shen Jianping Shi and Jiaya Jia. 2018. Icnet for real-time semantic segmentation on high-resolution images. In ECCV. 405--420."},{"key":"e_1_3_2_1_75_1","unstructured":"Hengshuang Zhao Jianping Shi Xiaojuan Qi Xiaogang Wang and Jiaya Jia. 2017. Pyramid scene parsing network. In CVPR. 2881--2890."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"crossref","unstructured":"Bolei Zhou Hang Zhao Xavier Puig Sanja Fidler Adela Barriuso and Antonio Torralba. 2017. Scene parsing through ade20k dataset. In CVPR. 633--641.","DOI":"10.1109\/CVPR.2017.544"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"crossref","unstructured":"Wujie Zhou Shaohua Dong Caie Xu and Yaguan Qian. 2022. Edge-aware Guidance Fusion Network for RGB Thermal Scene Parsing. In AAAI. 3571--3579.","DOI":"10.1609\/aaai.v36i3.20269"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3086618"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3242651"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2019.2959609"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611738","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611738","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:10:57Z","timestamp":1755821457000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611738"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":80,"alternative-id":["10.1145\/3581783.3611738","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611738","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}