{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,12]],"date-time":"2026-02-12T14:05:11Z","timestamp":1770905111403,"version":"3.50.1"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100009133","name":"Karlsruhe Institute of Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100009133","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128709","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"5798-5805","source":"Crossref","is-referenced-by-count":3,"title":["QueryCAD: Grounded Question Answering for CAD Models"],"prefix":"10.1109","author":[{"given":"Claudius","family":"Kienle","sequence":"first","affiliation":[{"name":"ArtiMinds Robotics,Karlsruhe,Germany"}]},{"given":"Benjamin","family":"Alt","sequence":"additional","affiliation":[{"name":"ArtiMinds Robotics,Karlsruhe,Germany"}]},{"given":"Darko","family":"Katic","sequence":"additional","affiliation":[{"name":"ArtiMinds Robotics,Karlsruhe,Germany"}]},{"given":"Rainer","family":"J\u00e4kel","sequence":"additional","affiliation":[{"name":"ArtiMinds Robotics,Karlsruhe,Germany"}]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[{"name":"IAS Lab,Computer Science Department,TU Darmstadt,Germany"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"crossref","first-page":"103226","DOI":"10.1016\/j.cad.2022.103226","article-title":"Hierarchical CADNet: Learning from B-Reps for Machining Feature Recognition","volume":"147","author":"Colligan","year":"2022","journal-title":"Computer-Aided Design"},{"key":"ref2","first-page":"9493","article-title":"Code as Policies: Language Model Programs for Embodied Control","volume-title":"2023 IEEE International Conference on Robotics and Automation (ICRA)","author":"Liang"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.1109\/ICRA57147.2024.10611143","article-title":"RoboGrind: Intuitive and Interactive Surface Treatment with Industrial Robots","author":"Alt","year":"2024"},{"key":"ref4","first-page":"652","article-title":"PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Qi","year":"2017"},{"key":"ref5","article-title":"PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Qi","year":"2017"},{"key":"ref6","first-page":"23 192","article-title":"PointNeXt: Revisiting PointNet++ with Improved Training and Scaling Strategies","volume":"35","author":"Qian","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref7","first-page":"21736","article-title":"PartSLIP: Low-Shot Part Segmentation for 3D Point Clouds via Pretrained Image-Language Models","volume-title":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Liu"},{"key":"ref8","volume-title":"PartSLIP++: Enhancing Low-Shot 3D Part Segmentation via Multi-View Instance Segmentation and Maximum Likelihood Estimation","author":"Zhou","year":"2023"},{"key":"ref9","first-page":"10965","article-title":"Grounded Language-Image Pre-Training","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Li"},{"key":"ref10","volume-title":"Point-SAM: Promptable 3D Segmentation Model for Point Clouds","author":"Zhou","year":"2024"},{"key":"ref11","first-page":"9224","article-title":"3D Semantic Segmentation with Submanifold Sparse Convolutional Networks","volume-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Graham"},{"key":"ref12","author":"Huang","year":"2023","journal-title":"Chat-3D v2: Bridging 3D Scene and Large Language Models with Object Identifiers"},{"key":"ref13","first-page":"2911","article-title":"3D-VisTA: Pre-trained Transformer for 3D Vision and Text Alignment","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Zhu","year":"2023"},{"key":"ref14","first-page":"5607","article-title":"CLIP-Guided Vision-Language Pre-Training for Question Answering in 3D Scenes","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops","author":"Parelli"},{"key":"ref15","first-page":"19129","article-title":"ScanQA: 3D Question Answering for Spatial Scene Understanding","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Azuma","year":"2022"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_13"},{"key":"ref17","volume-title":"SQA3D: Situated Question Answering in 3D Scenes","author":"Ma","year":"2023"},{"key":"ref18","volume-title":"3DRP-Net: 3D Relative Position-aware Network for 3D Visual Grounding","author":"Wang","year":"2023"},{"key":"ref19","first-page":"2928","article-title":"3DVG-Transformer: Relation Modeling for Visual Grounding on Point Clouds","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Zhao","year":"2021"},{"key":"ref20","first-page":"2662","article-title":"Distilling Coarse-to-Fine Semantic Matching Knowledge for Weakly Supervised 3D Visual Grounding","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV","author":"Wang","year":"2023"},{"key":"ref21","first-page":"20522","article-title":"Language Conditioned Spatial Relation Reasoning for 3D Object Grounding","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Chen"},{"key":"ref22","first-page":"15524","article-title":"Multi-View Transformer for 3D Visual Grounding","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Huang","year":"2022"},{"key":"ref23","first-page":"V11AT11A003","article-title":"Graph Representation of 3D CAD Models for Machining Feature Recognition With Deep Learning","volume-title":"International design engineering technical conferences and computers and information in engineering conference","volume":"84003","author":"Cao"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s12206-023-2403-4"},{"key":"ref25","first-page":"12773","article-title":"BRepNet: A Topological Message Passing System for Solid Models","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Lambourne","year":"2021"},{"key":"ref26","first-page":"11703","article-title":"UV-Net: Learning From Boundary Representations","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Jayaraman","year":"2021"},{"issue":"01","key":"ref27","first-page":"8279","article-title":"MeshNet: Mesh Neural Network for 3D Shape Representation","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"33","author":"Feng"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3322959"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3526212"},{"issue":"12","key":"ref30","first-page":"1165","article-title":"MFPointNet: A Point Cloud-Based Neural Network Using Selective Downsampling Layer for Machining Feature Recognition","volume-title":"Machines","volume":"10","author":"Lei","year":"2022"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1016\/j.cad.2018.03.006","article-title":"FeatureNet: Machining feature recognition based on 3D Convolution Neural Network","volume":"101","author":"Zhang","year":"2018","journal-title":"Computer-Aided Design"},{"key":"ref32","first-page":"1912","article-title":"3D ShapeNets: A deep representation for volumetric shapes","volume-title":"2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Wu"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"915","DOI":"10.1016\/j.promfg.2020.05.130","article-title":"Deep Learning Based Approach for Identifying Conventional Machining Processes from CAD Data","volume":"48","author":"Peddireddy","year":"2020","journal-title":"Procedia Manufacturing"},{"key":"ref34","first-page":"9601","article-title":"ABC: A Big CAD Model Dataset for Geometric Deep Learning","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Koch","year":"2019"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58523-5_11"},{"key":"ref36","volume-title":"Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks","author":"Ren","year":"2024"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.7591\/9781501728624"},{"key":"ref38","volume-title":"Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection","author":"Liu","year":"2024"},{"key":"ref39","first-page":"4015","article-title":"Segment Anything","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Kirillov","year":"2023"},{"key":"ref40","volume-title":"Papers with Code - COCO minival Benchmark (Object Detection)"},{"key":"ref41","volume-title":"Papers with Code - ODinW Benchmark (ZeroShot Object Detection)"},{"key":"ref42","volume-title":"Papers with Code - MSCOCO Benchmark (ZeroShot Object Detection)"},{"key":"ref43","volume-title":"An Introduction to Ray Tracing","author":"Glassner","year":"1989"},{"key":"ref44","first-page":"24824","article-title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Wei","year":"2022"},{"key":"ref45","article-title":"The Llama 3 Herd of Models","author":"Dubey","year":"2024"},{"key":"ref46","article-title":"GPT-4 Technical Report","author":"Achiam","year":"2024"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128709.pdf?arnumber=11128709","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:08:47Z","timestamp":1756879727000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128709\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128709","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}