{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T23:56:09Z","timestamp":1772927769401,"version":"3.50.1"},"reference-count":9,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"3","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2026,3,1]]},"DOI":"10.1587\/transinf.2025dvl0006","type":"journal-article","created":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T22:08:01Z","timestamp":1757628481000},"page":"460-463","source":"Crossref","is-referenced-by-count":0,"title":["Leveraging 2D-VLM for Label-Free 3D Segmentation in Large-Scale Outdoor Scene Understanding"],"prefix":"10.1587","volume":"E109.D","author":[{"given":"Toshihiko","family":"NISHIMURA","sequence":"first","affiliation":[{"name":"NTT Inc."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hirofumi","family":"ABE","sequence":"additional","affiliation":[{"name":"NTT Inc."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kazuhiko","family":"MURASAKI","sequence":"additional","affiliation":[{"name":"NTT Inc."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Taiga","family":"YOSHIDA","sequence":"additional","affiliation":[{"name":"NTT Inc."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ryuichi","family":"TANIDA","sequence":"additional","affiliation":[{"name":"NTT Inc."}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"532","reference":[{"key":"1","unstructured":"[1] A. Radford and et al., \u201cLearning transferable visual models from natural language supervision,\u201d International Conference on Machine Learning, PMLR, 2021."},{"key":"2","doi-asserted-by":"crossref","unstructured":"[2] R. Chen, Y. Liu, L. Kong, X. Zhu, Y. Ma, Y. Li, Y. Hou, Y. Qiao, and W. Wang, \u201cClip2scene: Towards label-efficient 3d scene understanding by clip,\u201d Proceedings of the Conference on Computer Vision and Pattern Recognition, 2023. 10.1109\/cvpr52729.2023.00678","DOI":"10.1109\/CVPR52729.2023.00678"},{"key":"3","unstructured":"[3] A. Takmaz and et al., \u201cOpenmask3d: Open-vocabulary 3d instance segmentation,\u201d Proceedings of the 37th International Conference on Neural Information Processing Systems (NIPS), 2023."},{"key":"4","doi-asserted-by":"crossref","unstructured":"[4] Z. Huang, X. Wu, X. Chen, H. Zhao, L. Zhu, and J. Lasenby, \u201cOpenins3d: Snap and lookup for 3d open-vocabulary instance segmentation,\u201d European Conference on Computer Vision, Springer Nature Switzerland, 2024. 10.1007\/978-3-031-73033-7_10","DOI":"10.1007\/978-3-031-73033-7_10"},{"key":"5","unstructured":"[5] T. Ren and et al., \u201cGrounded sam: Assembling open-world models for diverse visual tasks,\u201d International Conference on Computer Vision (ICCV) Demo Track, 2023."},{"key":"6","doi-asserted-by":"crossref","unstructured":"[6] S. Liu, Z. Zeng, T. Ren, F. Li, H. Zhang, J. Yang, Q. Jiang, C. Li, J. Yang, H. Su, J. Zhu, and L. Zhang, \u201cGrounding dino: Marrying dino with grounded pre-training for open-set object detection,\u201d European Conference on Computer Vision, Springer Nature Switzerland, 2024. 10.1007\/978-3-031-72970-6_3","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] A. Kirillov, E. Mintun, N. Ravi, H. Mao, C. Rolland, L. Gustafson, T. Xiao, S. Whitehead, A.C. Berg, W.-Y. Lo, P. Doll\u00e1r, and R. Girshick, \u201cSegment anything,\u201d Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023. 10.1109\/iccv51070.2023.00371","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"8","doi-asserted-by":"crossref","unstructured":"[8] X. Wu, L. Jiang, P.-S. Wang, Z. Liu, X. Liu, Y. Qiao, W. Ouyang, T. He, and H. Zhao, \u201cPoint transformer v3: Simpler faster stronger,\u201d Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.4840-4851, 2024. 10.1109\/cvpr52733.2024.00463","DOI":"10.1109\/CVPR52733.2024.00463"},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] S. Peng, K. Genova, C. Jiang, A. Tagliasacchi, M. Pollefeys, and T. Funkhouser, \u201cOpenscene: 3d scene understanding with open vocabularies,\u201d Proceedings of the Conference on Computer Vision and Pattern Recognition, 2023. 10.1109\/cvpr52729.2023.00085","DOI":"10.1109\/CVPR52729.2023.00085"}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E109.D\/3\/E109.D_2025DVL0006\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T04:10:41Z","timestamp":1772856641000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E109.D\/3\/E109.D_2025DVL0006\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,1]]},"references-count":9,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2025dvl0006","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"value":"0916-8532","type":"print"},{"value":"1745-1361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,1]]},"article-number":"2025DVL0006"}}