{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T11:08:45Z","timestamp":1762254525616,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592235","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"217-225","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Intra-inter Modal Attention Blocks for RGB-D Semantic Segmentation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2701-4782","authenticated-orcid":false,"given":"Soyun","family":"Choi","sequence":"first","affiliation":[{"name":"Inha University, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5671-3232","authenticated-orcid":false,"given":"Youjia","family":"Zhang","sequence":"additional","affiliation":[{"name":"Inha University, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1774-9168","authenticated-orcid":false,"given":"Sungeun","family":"Hong","sequence":"additional","affiliation":[{"name":"Inha University, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2644615"},{"key":"e_1_3_2_1_2_1","volume-title":"DCANet: Differential Convolution Attention Network for RGB-D Semantic Segmentation. arXiv preprint arXiv:2210.06747","author":"Bai Lizhi","year":"2022","unstructured":"Lizhi Bai, Jun Yang, Chunqi Tian, Yaoru Sun, Maoyu Mao, Yanjun Xu, and Weirong Xu. 2022. DCANet: Differential Convolution Attention Network for RGB-D Semantic Segmentation. arXiv preprint arXiv:2210.06747 (2022)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-006-7934-5"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00700"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00246"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/83.902291"},{"key":"e_1_3_2_1_7_1","volume-title":"Semantic image segmentation with deep convolutional nets and fully connected crfs. arXiv preprint arXiv:1412.7062","author":"Chen Liang-Chieh","year":"2014","unstructured":"Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy, and Alan\u00a0L Yuille. 2014. Semantic image segmentation with deep convolutional nets and fully connected crfs. arXiv preprint arXiv:1412.7062 (2014)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"e_1_3_2_1_9_1","volume-title":"Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587","author":"Chen Liang-Chieh","year":"2017","unstructured":"Liang-Chieh Chen, George Papandreou, Florian Schroff, and Hartwig Adam. 2017. Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_33"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01138"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01249"},{"key":"e_1_3_2_1_14_1","volume-title":"Key instance selection for unsupervised video object segmentation. arXiv preprint arXiv:1906.07851","author":"Cho Donghyeon","year":"2019","unstructured":"Donghyeon Cho, Sungeun Hong, Sungil Kang, and Jiwon Kim. 2019. Key instance selection for unsupervised video object segmentation. arXiv preprint arXiv:1906.07851 (2019)."},{"key":"e_1_3_2_1_15_1","volume-title":"Indoor semantic segmentation using depth information. arXiv preprint arXiv:1301.3572","author":"Couprie Camille","year":"2013","unstructured":"Camille Couprie, Cl\u00e9ment Farabet, Laurent Najman, and Yann LeCun. 2013. Indoor semantic segmentation using depth information. arXiv preprint arXiv:1301.3572 (2013)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00254"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.304"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.79"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.539"},{"key":"e_1_3_2_1_20_1","volume-title":"Proc. of Asian Conf. on Computer Vision (ACCV). Springer, 213\u2013228","author":"Hazirbas Caner","year":"2016","unstructured":"Caner Hazirbas, Lingni Ma, Csaba Domokos, and Daniel Cremers. 2016. Fusenet: Incorporating depth into semantic segmentation via fusion-based cnn architecture. In Proc. of Asian Conf. on Computer Vision (ACCV). Springer, 213\u2013228."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00366"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00770"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907236"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1049\/el.2020.0278"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803025"},{"key":"e_1_3_2_1_28_1","volume-title":"Rednet: Residual encoder-decoder network for indoor rgb-d semantic segmentation. arXiv preprint arXiv:1806.01054","author":"Jiang Jindong","year":"2018","unstructured":"Jindong Jiang, Lunan Zheng, Fei Luo, and Zhijun Zhang. 2018. Rednet: Residual encoder-decoder network for indoor rgb-d semantic segmentation. arXiv preprint arXiv:1806.01054 (2018)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00298"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00106"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812204"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.147"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.549"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202213"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00288"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.178"},{"key":"e_1_3_2_1_38_1","volume-title":"Proc. of Int\u2019l Conf. on Computer Vision (ICCV). 4980\u20134989","author":"Park Seong-Jin","year":"2017","unstructured":"Seong-Jin Park, Ki-Sang Hong, and Seungyong Lee. 2017. Rdfnet: Rgb-d multi-level residual feature fusion for indoor semantic segmentation. In Proc. of Int\u2019l Conf. on Computer Vision (ICCV). 4980\u20134989."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553479"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.556"},{"key":"e_1_3_2_1_41_1","volume-title":"Proc. of Computer Vision and Pattern Recognition (CVPR). IEEE, 2759\u20132766","author":"Ren Xiaofeng","year":"2012","unstructured":"Xiaofeng Ren, Liefeng Bo, and Dieter Fox. 2012. Rgb-(d) scene labeling: Features and algorithms. In Proc. of Computer Vision and Pattern Recognition (CVPR). IEEE, 2759\u20132766."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561675"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298655"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2005.852206"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME51207.2021.9428155"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9412787"},{"key":"e_1_3_2_1_48_1","volume-title":"Proc. of Neural Information Processing Systems (NeurIPS) 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Proc. of Neural Information Processing Systems (NeurIPS) 30 (2017)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_40"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_9"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"e_1_3_2_1_52_1","volume-title":"W-net: A deep model for fully unsupervised image segmentation. arXiv preprint arXiv:1711.08506","author":"Xia Xide","year":"2017","unstructured":"Xide Xia and Brian Kulis. 2017. W-net: A deep model for fully unsupervised image segmentation. arXiv preprint arXiv:1711.08506 (2017)."},{"key":"e_1_3_2_1_53_1","first-page":"12077","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","volume":"34","author":"Xie Enze","year":"2021","unstructured":"Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose\u00a0M Alvarez, and Ping Luo. 2021. SegFormer: Simple and efficient design for semantic segmentation with transformers. Proc. of Neural Information Processing Systems (NeurIPS) 34 (2021), 12077\u201312090.","journal-title":"Proc. of Neural Information Processing Systems (NeurIPS)"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3185766"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3084855"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3066071"},{"key":"e_1_3_2_1_58_1","volume-title":"Proc. of Asian Conf. on Computer Vision (ACCV). 90\u2013107","author":"Zhang Youjia","year":"2022","unstructured":"Youjia Zhang, Soyun Choi, and Sungeun Hong. 2022. Spatio-channel Attention Blocks for Cross-modal Crowd Counting. In Proc. of Asian Conf. on Computer Vision (ACCV). 90\u2013107."},{"key":"e_1_3_2_1_59_1","volume-title":"Attention-based dual supervised decoder for RGBD semantic segmentation. arXiv preprint arXiv:2201.01427","author":"Zhang Yang","year":"2022","unstructured":"Yang Zhang, Yang Yang, Chenyun Xiong, Guodong Sun, and Yanwen Guo. 2022. Attention-based dual supervised decoder for RGBD semantic segmentation. arXiv preprint arXiv:2201.01427 (2022)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00423"},{"key":"e_1_3_2_1_61_1","volume-title":"Proc. of Asian Conf. on Computer Vision (ACCV).","author":"Zhou Hao","year":"2020","unstructured":"Hao Zhou, Lu Qi, Zhaoliang Wan, Hai Huang, and Xu Yang. 2020. RGB-D co-attention network for semantic segmentation. In Proc. of Asian Conf. on Computer Vision (ACCV)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00457"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00068"}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Thessaloniki Greece","acronym":"ICMR '23"},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592235","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592235","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:51:22Z","timestamp":1750182682000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592235"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":63,"alternative-id":["10.1145\/3591106.3592235","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592235","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}