{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:22:11Z","timestamp":1775578931069,"version":"3.50.1"},"reference-count":64,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160590","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"8216-8223","source":"Crossref","is-referenced-by-count":178,"title":["Mask3D: Mask Transformer for 3D Semantic Instance Segmentation"],"prefix":"10.1109","author":[{"given":"Jonas","family":"Schult","sequence":"first","affiliation":[{"name":"RWTH Aachen University,Computer Vision Group,Germany"}]},{"given":"Francis","family":"Engelmann","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich,Computer Vision and Learning Group,Switzerland"}]},{"given":"Alexander","family":"Hermans","sequence":"additional","affiliation":[{"name":"RWTH Aachen University,Computer Vision Group,Germany"}]},{"given":"Or","family":"Litany","sequence":"additional","affiliation":[{"name":"NVIDIA,Santa Clara,USA"}]},{"given":"Siyu","family":"Tang","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich,Computer Vision and Learning Group,Switzerland"}]},{"given":"Bastian","family":"Leibe","sequence":"additional","affiliation":[{"name":"RWTH Aachen University,Computer Vision Group,Germany"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.170"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref3","author":"Chen","year":"2022","journal-title":"STPLS3D: A Large-Scale Synthetic and Real Aerial Photogrammetry 3D Point Cloud Dataset"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01518"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"ref6","article-title":"Per-Pixel Classification is Not All You Need for Semantic Segmentation","author":"Cheng","year":"2021","journal-title":"In Advances in Neural Information Processing Systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19821-2_39"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00319"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_35"},{"key":"ref11","article-title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","volume-title":"In International Conference on Learning Representations","author":"Dosovitskiy","year":"2021"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-33676-9_4"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00905"},{"key":"ref14","article-title":"A density-based algorithm for discovering clusters in large spatial databases with noise","volume-title":"In ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":"Ester","year":"1996"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000022288.19776.77"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00961"},{"key":"ref17","author":"Graham","year":"2017","journal-title":"Submanifold Sparse Convolutional Networks"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00301"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00042"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00455"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01533"},{"key":"ref24","article-title":"Machine Analysis of Bubble Chamber Pictures","volume-title":"In International Conference on High Energy Accelerators and Instrumen-tation","author":"Hough","year":"1959"},{"key":"ref25","article-title":"Gool. Dynamic Filter Networks","author":"Jia","year":"2016","journal-title":"Neural Information Processing Systems"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00492"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1002\/nav.3800020109"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/iccv.2019.00935"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00831"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0095-3"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00278"},{"key":"ref32","author":"Liu","year":"2019","journal-title":"MASC: Multi-Scale Affinity with Sparse Convolution for 3D Instance Segmentation"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00294"},{"key":"ref35","article-title":"Decoupled Weight Decay Regu-larization","volume-title":"In International Conference on Learning Representations","author":"Loshchilov","year":"2019"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7353481"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00290"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00022"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00738"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01644"},{"key":"ref41","article-title":"Deep Hough Voting for 3D Object Detection in Point Clouds","volume-title":"In International Conference on Computer Vision","author":"Charles","year":"2019"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.16"},{"key":"ref43","article-title":"Volumetric and Multi-View CNNs for Object Classification on 3D Data","volume-title":"In IEEE Conference on Computer Vision and Pattern Recognition","author":"Charles","year":"2016"},{"key":"ref44","article-title":"PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space","author":"Charles","year":"2017","journal-title":"In Advances in Neural Information Processing Systems"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2016.2577031"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_8"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20080-9_28"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1117\/12.2520589"},{"issue":"1","key":"ref50","first-page":"1929","article-title":"Dropout: A Simple Way to Prevent Neural Networks from Overfitting","volume":"15","author":"Srivastava","year":"2014","journal-title":"Journal of Machine Learning Research"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.255"},{"key":"ref52","article-title":"Fourier Features Let Networks Learn High Frequency Functions in Low Dimensional Domains","author":"Tancik","year":"2020","journal-title":"In Advances in Neural Information Processing Systems"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00651"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_17"},{"key":"ref55","article-title":"Attention Is All You Need","author":"Vaswani","year":"2017","journal-title":"In Advances in Neural Information Processing Systems"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00273"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073608"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00272"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00422"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298801"},{"key":"ref61","article-title":"Learning Object Bounding Boxes for 3D Instance Segmentation on Point Clouds","author":"Yang","year":"2019","journal-title":"In Advances in Neural Information Processing Systems"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00407"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9859996"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160590.pdf?arnumber=10160590","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T11:39:35Z","timestamp":1709293175000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160590\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":64,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160590","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}