{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:36:36Z","timestamp":1765308996394,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755706","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:56:44Z","timestamp":1761375404000},"page":"2141-2149","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["DiffuSeg: Diffusion-Enhanced Cross-Modal Semantic Segmentation for RGB-D"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2124-0869","authenticated-orcid":false,"given":"Jun","family":"Yang","sequence":"first","affiliation":[{"name":"Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4316-4385","authenticated-orcid":false,"given":"Maoyu","family":"Mao","sequence":"additional","affiliation":[{"name":"Tongji University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Segdiff: Image segmentation with diffusion probabilistic models. arXiv preprint arXiv:2112.00390","author":"Amit Tomer","year":"2021","unstructured":"Tomer Amit, Tal Shaharbany, Eliya Nachmani, and Lior Wolf. 2021. Segdiff: Image segmentation with diffusion probabilistic models. arXiv preprint arXiv:2112.00390 (2021)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_20"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2025.111379"},{"key":"e_1_3_2_1_4_1","volume-title":"Label-efficient semantic segmentation with diffusion models. arXiv preprint arXiv:2112.03126","author":"Baranchuk Dmitry","year":"2021","unstructured":"Dmitry Baranchuk, Ivan Rubachev, Andrey Voynov, Valentin Khrulkov, and Artem Babenko. 2021. Label-efficient semantic segmentation with diffusion models. arXiv preprint arXiv:2112.03126 (2021)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00584"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00462"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00700"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3049332"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_33"},{"key":"e_1_3_2_1_10_1","volume-title":"Diffusion model for camouflaged object detection. arXiv preprint arXiv:2308.00303","author":"Chen Zhennan","year":"2023","unstructured":"Zhennan Chen, Rongrong Gao, Tian-Zhu Xiang, and Fan Lin. 2023. Diffusion model for camouflaged object detection. arXiv preprint arXiv:2308.00303 (2023)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.161"},{"key":"e_1_3_2_1_12_1","volume-title":"Diffusion models beat gans on image synthesis. Advances in neural information processing systems","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in neural information processing systems, Vol. 34 (2021), 8780-8794."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00089"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00756"},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 16102-16112","author":"Girdhar Rohit","key":"e_1_3_2_1_15_1","unstructured":"Rohit Girdhar, Mannat Singh, Nikhila Ravi, Laurens van der Maaten, Armand Joulin, and Ishan Misra. 2022. Omnivore: A single model for many visual modalities. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 16102-16112."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447191"},{"key":"e_1_3_2_1_17_1","volume-title":"Prompt-to-prompt image editing with cross attention control.(2022). URL https:\/\/arxiv.org\/abs\/2208.01626","author":"Hertz Amir","year":"2022","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control.(2022). URL https:\/\/arxiv.org\/abs\/2208.01626 (2022)."},{"key":"e_1_3_2_1_18_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840-6851."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00298"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.01.029"},{"key":"e_1_3_2_1_21_1","volume-title":"Finding an unsupervised image segmenter in each of your deep generative models. arXiv preprint arXiv:2105.08127","author":"Melas-Kyriazi Luke","year":"2021","unstructured":"Luke Melas-Kyriazi, Christian Rupprecht, Iro Laina, and Andrea Vedaldi. 2021. Finding an unsupervised image segmenter in each of your deep generative models. arXiv preprint arXiv:2105.08127 (2021)."},{"key":"e_1_3_2_1_22_1","volume-title":"Comptr: Towards diverse bi-source dense prediction tasks via a simple yet general complementary transformer","author":"Pang Youwei","year":"2025","unstructured":"Youwei Pang, Xiaoqi Zhao, Lihe Zhang, and Huchuan Lu. 2025. Comptr: Towards diverse bi-source dense prediction tasks via a simple yet general complementary transformer. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01110"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561675"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"e_1_3_2_1_26_1","volume-title":"International conference on machine learning. PMLR, 2256-2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International conference on machine learning. PMLR, 2256-2265."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298655"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.28"},{"key":"e_1_3_2_1_29_1","volume-title":"International conference on machine learning. PMLR, 9786-9796","author":"Voynov Andrey","year":"2020","unstructured":"Andrey Voynov and Artem Babenko. 2020. Unsupervised discovery of interpretable directions in the gan latent space. In International conference on machine learning. PMLR, 9786-9796."},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Machine Learning. PMLR, 10596-10606","author":"Voynov Andrey","year":"2021","unstructured":"Andrey Voynov, Stanislav Morozov, and Artem Babenko. 2021. Object segmentation without labels with large-scale generative models. In International Conference on Machine Learning. PMLR, 10596-10606."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00176"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_9"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01187"},{"key":"e_1_3_2_1_34_1","volume-title":"International Conference on Medical Imaging with Deep Learning. PMLR, 1336-1348","author":"Wolleb Julia","year":"2022","unstructured":"Julia Wolleb, Robin Sandk\u00fchler, Florentin Bieder, Philippe Valmaggia, and Philippe C Cattin. 2022. Diffusion models for implicit image segmentation ensembles. In International Conference on Medical Imaging with Deep Learning. PMLR, 1336-1348."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28418"},{"key":"e_1_3_2_1_36_1","volume-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers. Advances in neural information processing systems","author":"Xie Enze","year":"2021","unstructured":"Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M Alvarez, and Ping Luo. 2021. SegFormer: Simple and efficient design for semantic segmentation with transformers. Advances in neural information processing systems, Vol. 34 (2021), 12077-12090."},{"key":"e_1_3_2_1_37_1","volume-title":"Pixel difference convolutional network for RGB-D semantic segmentation","author":"Yang Jun","year":"2023","unstructured":"Jun Yang, Lizhi Bai, Yaoru Sun, Chunqi Tian, Maoyu Mao, and Guorun Wang. 2023. Pixel difference convolutional network for RGB-D semantic segmentation. IEEE Transactions on Circuits and Systems for Video Technology (2023)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_30"},{"key":"e_1_3_2_1_39_1","volume-title":"Dformer: Rethinking rgbd representation learning for semantic segmentation. arXiv preprint arXiv:2309.09668","author":"Yin Bowen","year":"2023","unstructured":"Bowen Yin, Xuying Zhang, Zhongyu Li, Li Liu, Ming-Ming Cheng, and Qibin Hou. 2023. Dformer: Rethinking rgbd representation learning for semantic segmentation. arXiv preprint arXiv:2309.09668 (2023)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20056-4_2"},{"key":"e_1_3_2_1_41_1","volume-title":"CMX: Cross-modal fusion for RGB-X semantic segmentation with transformers","author":"Zhang Jiaming","year":"2023","unstructured":"Jiaming Zhang, Huayao Liu, Kailun Yang, Xinxin Hu, Ruiping Liu, and Rainer Stiefelhagen. 2023b. CMX: Cross-modal fusion for RGB-X semantic segmentation with transformers. IEEE Transactions on Intelligent Transportation Systems (2023)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00116"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01001"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755706","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:33:37Z","timestamp":1765308817000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755706"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":43,"alternative-id":["10.1145\/3746027.3755706","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755706","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}