{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T09:28:09Z","timestamp":1780392489343,"version":"3.54.1"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729065","type":"print"},{"value":"9783031729072","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72907-2_24","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:22:17Z","timestamp":1730301737000},"page":"407-423","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Segment, Lift and\u00a0Fit: Automatic 3D Shape Labeling from\u00a02D Prompts"],"prefix":"10.1007","author":[{"given":"Jianhao","family":"Li","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tianyu","family":"Sun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhongdao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Enze","family":"Xie","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bailan","family":"Feng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hongbo","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ze","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ke","family":"Xu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiaheng","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ping","family":"Luo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuscenes: a multimodal dataset for autonomous driving. arXiv preprint arXiv:1903.11027 (2019)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Chen, K., et al.: Hybrid task cascade for instance segmentation. In: CVPR, pp. 4974\u20134983 (2019)","DOI":"10.1109\/CVPR.2019.00511"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Deng, J., Shi, S., Li, P., Zhou, W., Zhang, Y., Li, H.: Voxel R-CNN: towards high performance voxel-based 3D object detection. arXiv:2012.15712 (2020)","DOI":"10.1609\/aaai.v35i2.16207"},{"key":"24_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1007\/978-3-319-45886-1_18","volume-title":"Pattern Recognition","author":"F Engelmann","year":"2016","unstructured":"Engelmann, F., St\u00fcckler, J., Leibe, B.: Joint object pose estimation and shape reconstruction in urban street scenes using 3D shape priors. In: Rosenhahn, B., Andres, B. (eds.) GCPR 2016. LNCS, vol. 9796, pp. 219\u2013230. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-45886-1_18"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Engelmann, F., St\u00fcckler, J., Leibe, B.: SAMP: shape and motion priors for 4D vehicle reconstruction. In: 2017 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 400\u2013408. IEEE (2017)","DOI":"10.1109\/WACV.2017.51"},{"issue":"6","key":"24_CR6","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1145\/358669.358692","volume":"24","author":"MA Fischler","year":"1981","unstructured":"Fischler, M.A., Bolles, R.C.: Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography. Commun. ACM 24(6), 381\u2013395 (1981)","journal-title":"Commun. ACM"},{"key":"24_CR7","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The kitti vision benchmark suite. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"issue":"12","key":"24_CR8","doi-asserted-by":"publisher","first-page":"8717","DOI":"10.1109\/TCSVT.2022.3197395","volume":"32","author":"J Guo","year":"2022","unstructured":"Guo, J., Liu, J., Xu, D.: 3D-pruning: a model compression framework for efficient 3D action recognition. IEEE Trans. Circuits Syst. Video Technol. 32(12), 8717\u20138729 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"24_CR9","doi-asserted-by":"crossref","unstructured":"Heylen, J., et al.: Monocinis: camera independent monocular 3D object detection using instance segmentation. In: ICCV, pp. 923\u2013934 (2021)","DOI":"10.1109\/ICCVW54120.2021.00108"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, Y., Zhou, J., Lu, J.: Tri-perspective view for vision-based 3D semantic occupancy prediction. arXiv preprint arXiv:2302.07817 (2023)","DOI":"10.1109\/CVPR52729.2023.00890"},{"key":"24_CR11","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: ICLR (2015)"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"24_CR13","doi-asserted-by":"publisher","unstructured":"Kundu, A., Li, Y., Rehg, J.M.: 3D-RCNN: instance-level 3D object reconstruction via render-and-compare. In: CVPR (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00375","DOI":"10.1109\/CVPR.2018.00375"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Lang, A.H., Vora, S., Caesar, H., Zhou, L., Yang, J., Beijbom, O.: Pointpillars: fast encoders for object detection from point clouds. In: CVPR, pp. 12697\u201312705 (2019)","DOI":"10.1109\/CVPR.2019.01298"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Liu, C., et al.: Multimodal transformer for automatic 3D annotation and object detection. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19839-7_38"},{"key":"24_CR16","doi-asserted-by":"publisher","first-page":"5287","DOI":"10.1109\/TIP.2022.3193290","volume":"31","author":"J Liu","year":"2022","unstructured":"Liu, J., Guo, J., Xu, D.: Apsnet: toward adaptive point sampling for efficient 3D action recognition. IEEE Trans. Image Process. 31, 5287\u20135302 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"24_CR17","doi-asserted-by":"publisher","first-page":"5649","DOI":"10.1109\/TMM.2022.3198011","volume":"25","author":"J Liu","year":"2022","unstructured":"Liu, J., Guo, J., Xu, D.: Geometrymotion-transformer: an end-to-end framework for 3D action recognition. IEEE Trans. Multimedia 25, 5649\u20135661 (2022)","journal-title":"IEEE Trans. Multimedia"},{"key":"24_CR18","unstructured":"Liu, J., et al.: 3D-queryis: a query-based framework for 3D instance segmentation. arXiv preprint arXiv:2211.09375 (2022)"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Liu, J., et al.: LTA-PCS: learnable task-agnostic point cloud sampling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 28035\u201328045 (2024)","DOI":"10.1109\/CVPR52733.2024.02648"},{"issue":"12","key":"24_CR20","doi-asserted-by":"publisher","first-page":"4711","DOI":"10.1109\/TCSVT.2021.3101847","volume":"31","author":"J Liu","year":"2021","unstructured":"Liu, J., Xu, D.: Geometrymotion-net: a strong two-stream baseline for 3D action recognition. IEEE Trans. Circuits Syst. Video Technol. 31(12), 4711\u20134721 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"24_CR21","doi-asserted-by":"crossref","unstructured":"Liu, S., Li, T., Chen, W., Li, H.: Soft rasterizer: a differentiable renderer for image-based 3D reasoning. ICCV (2019)","DOI":"10.1109\/ICCV.2019.00780"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Liu, Z., Zhou, D., Lu, F., Fang, J., Zhang, L.: Autoshape: real-time shape-aware monocular 3D object detection. In: ICCV, pp. 15641\u201315650 (2021)","DOI":"10.1109\/ICCV48922.2021.01535"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"McCraith, R., Insafutdinov, E., Neumann, L., Vedaldi, A.: Lifting 2D object locations to 3D by discounting lidar outliers across objects and views (2022)","DOI":"10.1109\/ICRA46639.2022.9811693"},{"key":"24_CR24","doi-asserted-by":"crossref","unstructured":"Meng, Q., Wang, W., Zhou, T., Shen, J., Van\u00a0Gool, L., Dai, D.: Weakly supervised 3D object detection from lidar point cloud. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58601-0_31"},{"key":"24_CR25","unstructured":"Ming\u00a0Fang, Z.L.: Occupancy dataset for nuscenes (2023). https:\/\/github.com\/FANG-MING\/occupancy-for-nuscenes"},{"key":"24_CR26","series-title":"Applied Mathematical Sciences","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/0-387-22746-6_2","volume-title":"Level Set Methods and Dynamic Implicit Surfaces","author":"S Osher","year":"2003","unstructured":"Osher, S., Fedkiw, R.: Signed distance functions. In: Osher, S., Fedkiw, R. (eds.) Level Set Methods and Dynamic Implicit Surfaces. AMS, vol. 153, pp. 17\u201322. Springer, New York (2003). https:\/\/doi.org\/10.1007\/0-387-22746-6_2"},{"key":"24_CR27","doi-asserted-by":"crossref","unstructured":"Park, J.J., Florence, P., Straub, J., Newcombe, R., Lovegrove, S.: Deepsdf: learning continuous signed distance functions for shape representation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00025"},{"key":"24_CR28","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/978-3-031-19769-7_8","volume-title":"ECCV 2022","author":"L Peng","year":"2022","unstructured":"Peng, L., et al.: Lidar point cloud guided monocular 3D object detection. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13661, pp. 123\u2013139. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19769-7_8"},{"key":"24_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1007\/978-3-642-37331-2_45","volume-title":"Computer Vision \u2013 ACCV 2012","author":"VA Prisacariu","year":"2013","unstructured":"Prisacariu, V.A., Segal, A.V., Reid, I.: Simultaneous monocular 2D segmentation, 3D pose recovery and 3D reconstruction. In: Lee, K.M., Matsushita, Y., Rehg, J.M., Hu, Z. (eds.) ACCV 2012. LNCS, vol. 7724, pp. 593\u2013606. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-37331-2_45"},{"key":"24_CR30","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: Pointnet: deep learning on point sets for 3D classification and segmentation. arXiv preprint arXiv:1612.00593 (2016)"},{"key":"24_CR31","doi-asserted-by":"crossref","unstructured":"Qin, Z., Wang, J., Lu, Y.: Weakly supervised 3D object detection from point clouds. ACMMM (2020)","DOI":"10.1145\/3394171.3413805"},{"issue":"8","key":"24_CR32","first-page":"2647","volume":"43","author":"S Shi","year":"2020","unstructured":"Shi, S., Wang, Z., Shi, J., Wang, X., Li, H.: From points to parts: 3D object detection from point cloud with part-aware and part-aggregation network. IEEE Trans. Pattern Anal. Mach. Intell. 43(8), 2647\u20132664 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"24_CR33","doi-asserted-by":"crossref","unstructured":"Song, X., et al.: Apollocar3d: a large 3D car instance understanding benchmark for autonomous driving. In: CVPR, pp. 5452\u20135462 (2019)","DOI":"10.1109\/CVPR.2019.00560"},{"key":"24_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1007\/978-3-319-67558-9_28","volume-title":"Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support","author":"CH Sudre","year":"2017","unstructured":"Sudre, C.H., Li, W., Vercauteren, T., Ourselin, S., Jorge Cardoso, M.: Generalised dice overlap as a deep learning loss function for highly unbalanced segmentations. In: Cardoso, M.J., et al. (eds.) DLMIA\/ML-CDS -2017. LNCS, vol. 10553, pp. 240\u2013248. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-67558-9_28"},{"key":"24_CR35","doi-asserted-by":"crossref","unstructured":"Tang, Y.S., Lee, G.H.: Transferable semi-supervised 3D object detection from RGB-D data. In: ICCV, pp. 1931\u20131940 (2019)","DOI":"10.1109\/ICCV.2019.00202"},{"key":"24_CR36","unstructured":"OpenPCDet Development Team: Openpcdet: an open-source toolbox for 3D object detection from point clouds (2020). https:\/\/github.com\/open-mmlab\/OpenPCDet"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Wang, R., Yang, N., Stueckler, J., Cremers, D.: Directshape: photometric alignment of shape priors for visual vehicle pose and shape estimation (2020)","DOI":"10.1109\/ICRA40945.2020.9197095"},{"key":"24_CR38","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Openoccupancy: a large scale benchmark for surrounding semantic occupancy perception. arXiv preprint arXiv:2303.03991 (2023)","DOI":"10.1109\/ICCV51070.2023.01636"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Wei, Y., Su, S., Lu, J., Zhou, J.: FGR: frustum-aware geometric reasoning for weakly supervised 3D vehicle detection (2021)","DOI":"10.1109\/ICRA48506.2021.9561245"},{"key":"24_CR40","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1007\/978-3-031-19842-7_11","volume-title":"ECCV 2022","author":"Y Wei","year":"2022","unstructured":"Wei, Y., Wei, Z., Rao, Y., Li, J., Zhou, J., Lu, J.: Lidar distillation: bridging the beam-induced domain gap for 3D object detection. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13699, pp. 179\u2013195. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19842-7_11"},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhao, L., Zheng, W., Zhu, Z., Zhou, J., Lu, J.: Surroundocc: multi-camera 3D occupancy prediction for autonomous driving. arXiv preprint arXiv:2303.09551 (2023)","DOI":"10.1109\/ICCV51070.2023.01986"},{"issue":"10","key":"24_CR42","doi-asserted-by":"publisher","first-page":"3337","DOI":"10.3390\/s18103337","volume":"18","author":"Y Yan","year":"2018","unstructured":"Yan, Y., Mao, Y., Li, B.: Second: sparsely embedded convolutional detection. Sensors 18(10), 3337 (2018)","journal-title":"Sensors"},{"key":"24_CR43","doi-asserted-by":"crossref","unstructured":"Yang, H., et al.: GD-MAE: generative decoder for MAE pre-training on lidar point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9403\u20139414 (2023)","DOI":"10.1109\/CVPR52729.2023.00907"},{"key":"24_CR44","doi-asserted-by":"crossref","unstructured":"Zakharov, S., Kehl, W., Bhargava, A., Gaidon, A.: Autolabeling 3D objects with differentiable rendering of SDF shape priors. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01224"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72907-2_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:34:30Z","timestamp":1730302470000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72907-2_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031729065","9783031729072"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72907-2_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}