{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T03:30:54Z","timestamp":1769743854156,"version":"3.49.0"},"reference-count":56,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1109\/ijcnn54540.2023.10191977","type":"proceedings-article","created":{"date-parts":[[2023,8,2]],"date-time":"2023-08-02T17:30:03Z","timestamp":1690997403000},"page":"1-10","source":"Crossref","is-referenced-by-count":17,"title":["Efficient Multi-Task Scene Analysis with RGB-D Transformers"],"prefix":"10.1109","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8447-0584","authenticated-orcid":false,"given":"S\u00f6hnke Benedikt","family":"Fischedick","sequence":"first","affiliation":[{"name":"Ilmenau University of Technology,Neuroinformatics and Cognitive Robotics Lab,Ilmenau,Germany,98684"}]},{"given":"Daniel","family":"Seichter","sequence":"additional","affiliation":[{"name":"Ilmenau University of Technology,Neuroinformatics and Cognitive Robotics Lab,Ilmenau,Germany,98684"}]},{"given":"Robin","family":"Schmidt","sequence":"additional","affiliation":[{"name":"Ilmenau University of Technology,Neuroinformatics and Cognitive Robotics Lab,Ilmenau,Germany,98684"}]},{"given":"Leonard","family":"Rabes","sequence":"additional","affiliation":[{"name":"Ilmenau University of Technology,Neuroinformatics and Cognitive Robotics Lab,Ilmenau,Germany,98684"}]},{"given":"Horst-Michael","family":"Gross","sequence":"additional","affiliation":[{"name":"Ilmenau University of Technology,Neuroinformatics and Cognitive Robotics Lab,Ilmenau,Germany,98684"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref12","article-title":"An Image is Worth 16&#x00D7;16 Words: Transformers for Image Recognition at Scale","author":"dosovitskiy","year":"0","journal-title":"Proc of ICLR"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01073"},{"key":"ref15","article-title":"SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers","author":"xie","year":"0","journal-title":"Proc of NeurIPS"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3054719"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00330"},{"key":"ref11","article-title":"Language models are few-shot learners","volume":"33","author":"brown","year":"0","journal-title":"Proc of NeurIPS"},{"key":"ref55","first-page":"8024","article-title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","author":"paszke","year":"2019","journal-title":"Proc of NeurIPS"},{"key":"ref10","author":"devlin","year":"2018","journal-title":"BERT Pre-training of deep bidirectional transformers for language understanding"},{"key":"ref54","year":"2023","journal-title":"FasterTransformer"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"ref19","author":"jiang","year":"2018","journal-title":"Rednet Residual encoder-decoder network for indoor rgb-d semantic segmentation"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54181-5_14"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00115"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.597"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00542"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01249"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968506"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24947-6_13"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00656"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00963"},{"key":"ref44","author":"yang","year":"2019","journal-title":"Deeperlab Single-shot image parser"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00902"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340870"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref7","first-page":"6105","article-title":"Efficientnet: Rethinking model scaling for convolutional neural networks","author":"tan","year":"0","journal-title":"Proc of ICML"},{"key":"ref9","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"0","journal-title":"Proc of NeurIPS"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298655"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"ref40","first-page":"12 607","article-title":"In Defense of Pretrained ImageNet Architectures for Real-time Semantic Segmentation of Road-driving Images","author":"or\u0161i?","year":"0","journal-title":"Proc of CVPR"},{"key":"ref35","first-page":"1","author":"chen","year":"2020","journal-title":"Spatial Information Guided Convolution for Real-Time RGBD Semantic Segmentation"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_33"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01563"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00700"},{"key":"ref31","first-page":"801","article-title":"Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation","author":"chen","year":"0","journal-title":"Proc of ECCV"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8296484"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803757"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_9"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9892852"},{"key":"ref1","first-page":"202","article-title":"The MORPHIA Project: First Results of a Long-Term User Study in an Elderly Care Scenario from Robotic Point of View","author":"wengefeld","year":"0","journal-title":"Proc of ISR2"},{"key":"ref39","author":"chen","year":"2017","journal-title":"Rethinking atrous convolution for semantic image segmentation"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"},{"key":"ref24","first-page":"1","author":"fooladgar","year":"2019","journal-title":"Multi-Modal Attention-based Fusion Model for Semantic Segmentation of RGB-Depth Images"},{"key":"ref23","article-title":"Self-supervised Model Adaptation For Multimodal Semantic Segmentation","author":"valada","year":"2019","journal-title":"IJCV"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561675"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_33"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803025"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803146"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.533"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_34"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803360"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206396"}],"event":{"name":"2023 International Joint Conference on Neural Networks (IJCNN)","location":"Gold Coast, Australia","start":{"date-parts":[[2023,6,18]]},"end":{"date-parts":[[2023,6,23]]}},"container-title":["2023 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10190990\/10190992\/10191977.pdf?arnumber=10191977","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T17:46:02Z","timestamp":1692639962000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10191977\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":56,"URL":"https:\/\/doi.org\/10.1109\/ijcnn54540.2023.10191977","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]}}}