{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T07:03:32Z","timestamp":1773903812634,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":41,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819609680","type":"print"},{"value":"9789819609697","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0969-7_13","type":"book-chapter","created":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T12:54:32Z","timestamp":1733576072000},"page":"211-227","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["SeSame: Simple, Easy 3D Object Detection with\u00a0Point-Wise Semantics"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-8197-8017","authenticated-orcid":false,"given":"O.","family":"Hayeon","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2065-4031","authenticated-orcid":false,"given":"Chanuk","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7179-7841","authenticated-orcid":false,"given":"Kunsoo","family":"Huh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,8]]},"reference":[{"key":"13_CR1","unstructured":"Smith, L. A disciplined approach to neural network hyper-parameters: Part 1 - learning rate, batch size, momentum, and weight decay. (2018)"},{"key":"13_CR2","doi-asserted-by":"crossref","unstructured":"Zhou, Y. & Tuzel, O. VoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection. Proceedings Of The IEEE Conference On Computer Vision And Pattern Recognition (CVPR). (2018,6)","DOI":"10.1109\/CVPR.2018.00472"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X. & Li, H. PointRCNN: 3D object proposal generation and detection from point cloud. Proceedings Of The IEEE Computer Society Conference On Computer Vision And Pattern Recognition. 2019-June pp. 770-779 (2019)","DOI":"10.1109\/CVPR.2019.00086"},{"key":"13_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/s18103337","volume":"18","author":"Y Yan","year":"2018","unstructured":"Yan, Y., Mao, Y., Li, B.: SECOND\u202f: Sparsely embedded convolutional detection. Sensors (Switzerland). 18, 1\u201317 (2018)","journal-title":"Sensors (Switzerland)."},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Lang, A., Vora, S., Caesar, H., Zhou, L., Yang, J. & Beijbom, O. PointPillars: Fast encoders for object detection from point clouds. Proceedings Of The IEEE Computer Society Conference On Computer Vision And Pattern Recognition. 2019-June pp. 12689-12697 (2019)","DOI":"10.1109\/CVPR.2019.01298"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Singh, A. & Bankiti, V. Surround-View Vision-based 3D Detection for Autonomous Driving: A Survey. (2023), http:\/\/arxiv.org\/abs\/2302.06650","DOI":"10.1109\/ICCVW60793.2023.00348"},{"key":"13_CR7","unstructured":"Ma, Y., Wang, T., Bai, X., Yang, H., Hou, Y., Wang, Y., Qiao, Y., Yang, R., Manocha, D. & Zhu, X. Vision-Centric BEV Perception: A Survey. (2022,8), http:\/\/arxiv.org\/abs\/2208.02797"},{"key":"13_CR8","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1016\/j.neucom.2019.11.118","volume":"406","author":"S Hao","year":"2020","unstructured":"Hao, S., Zhou, Y., Guo, Y.: A Brief Survey on Semantic Segmentation with Deep Learning. Neurocomputing 406, 302\u2013321 (2020)","journal-title":"Neurocomputing"},{"key":"13_CR9","unstructured":"Chen, X., Kundu, K., Zhu, Y., Berneshawi, A., Ma, H., Fidler, S. & Urtasun, R. 3D Object Proposals for Accurate Object Class Detection. Advances In Neural Information Processing Systems. 28 (2015)"},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chao, W., Garg, D., Hariharan, B., Campbell, M. & Weinberger, K. Pseudo-lidar from visual depth estimation: Bridging the gap in 3D object detection for autonomous driving. Proceedings Of The IEEE Computer Society Conference On Computer Vision And Pattern Recognition. 2019-June pp. 8437-8445 (2019)","DOI":"10.1109\/CVPR.2019.00864"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Park, D., Ambru\u015f, R., Guizilini, V., Li, J. & Gaidon, A. DD3D : Is Pseudo-Lidar needed for Monocular 3D Object detection?. Proceedings Of The IEEE International Conference On Computer Vision. pp. 3122-3132 (2021)","DOI":"10.1109\/ICCV48922.2021.00313"},{"key":"13_CR12","first-page":"1","volume":"2022","author":"Z Li","year":"2022","unstructured":"Li, Z., Wang, W., Li, H., Xie, E., Sima, C., Lu, T., Qiao, Y., Dai, J.: BEVFormer: Learning Bird\u2019s-Eye-View Representation from Multi-camera Images via Spatiotemporal Transformers. Computer Vision - ECCV 2022, 1\u201318 (2022)","journal-title":"Computer Vision - ECCV"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Chen, X., Ma, H., Wan, J., Li, B. & Xia, T. Multi-View 3D Object Detection Network for Autonomous Driving. Proceedings Of The IEEE Conference On Computer Vision And Pattern Recognition (CVPR). (2017,7)","DOI":"10.1109\/CVPR.2017.691"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Ku, J., Mozifian, M., Lee, J., Harakeh, A. & Waslander, S. Joint 3D Proposal Generation and Object Detection from View Aggregation. 2018 IEEE\/RSJ International Conference On Intelligent Robots And Systems (IROS). pp. 1-8 (2018)","DOI":"10.1109\/IROS.2018.8594049"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Qi, C., Liu, W., Wu, C., Su, H. & Guibas, L. Frustum PointNets for 3D Object Detection From RGB-D Data. Proceedings Of The IEEE Conference On Computer Vision And Pattern Recognition (CVPR). (2018,6)","DOI":"10.1109\/CVPR.2018.00102"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Pang, S., Morris, D. & Radha, H. CLOCs: Camera-LiDAR Object Candidates Fusion for 3D Object Detection. IEEE International Conference On Intelligent Robots And Systems. pp. 10386-10393 (2020)","DOI":"10.1109\/IROS45743.2020.9341791"},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Vora, S., Lang, A., Helou, B. & Beijbom, O. PointPainting: Sequential fusion for 3D object detection. Proceedings Of The IEEE Computer Society Conference On Computer Vision And Pattern Recognition. pp. 4603-4611 (2020)","DOI":"10.1109\/CVPR42600.2020.00466"},{"key":"13_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1007\/978-3-030-58555-6_3","volume-title":"Computer Vision \u2013 ECCV 2020","author":"T Huang","year":"2020","unstructured":"Huang, T., Liu, Z., Chen, X., Bai, X.: EPNet: Enhancing Point Features with Image Semantics for 3D Object Detection. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12360, pp. 35\u201352. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58555-6_3"},{"key":"13_CR19","doi-asserted-by":"publisher","first-page":"12460","DOI":"10.1609\/aaai.v34i07.6933","volume":"34","author":"L Xie","year":"2020","unstructured":"Xie, L., Xiang, C., Yu, Z., Xu, G., Yang, Z., Cai, D., He, X.: PI-RCNN: An efficient multi-sensor 3D object detector with point-based attentive cont-conv fusion module. Proceedings Of The AAAI Conference On Artificial Intelligence. 34, 12460\u201312467 (2020)","journal-title":"Proceedings Of The AAAI Conference On Artificial Intelligence."},{"key":"13_CR20","doi-asserted-by":"crossref","unstructured":"Bai, X., Hu, Z., Zhu, X., Huang, Q., Chen, Y., Fu, H. & Tai, C. TransFusion: Robust LiDAR-Camera Fusion for 3D Object Detection With Transformers. Proceedings Of The IEEE\/CVF Conference On Computer Vision And Pattern Recognition (CVPR). pp. 1090-1099 (2022,6)","DOI":"10.1109\/CVPR52688.2022.00116"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Z., Tang, H., Amini, A., Yang, X., Mao, H., Rus, D. & Han, S. BEVFusion: Multi-Task Multi-Sensor Fusion with Unified Bird\u2019s-Eye View Representation. 2023 IEEE International Conference On Robotics And Automation (ICRA). pp. 2774-2781 (2023)","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"Lin, T., Dollar, P., Girshick, R., He, K., Hariharan, B. & Belongie, S. Feature Pyramid Networks for Object Detection. Proceedings Of The IEEE Conference On Computer Vision And Pattern Recognition (CVPR). (2017,7)","DOI":"10.1109\/CVPR.2017.106"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E. & Darrell, T. Fully Convolutional Networks for Semantic Segmentation. Proceedings Of The IEEE Conference On Computer Vision And Pattern Recognition (CVPR). (2015,6)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"13_CR24","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P. & Brox, T. U-Net: Convolutional Networks for Biomedical Image Segmentation. Medical Image Computing And Computer-Assisted Intervention \u2013 MICCAI 2015. pp. 234-241 (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"13_CR25","unstructured":"Iandola, F., Han, S., Moskewicz, M., Ashraf, K., Dally, W. & Keutzer, K. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and $$<$$0.5MB model size. (2016)"},{"key":"13_CR26","unstructured":"Chen, L., Papandreou, G., Schroff, F. & Adam, H. Rethinking Atrous Convolution for Semantic Image Segmentation. CoRR. abs\/1706.05587 (2017), http:\/\/arxiv.org\/abs\/1706.05587"},{"key":"13_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-030-01234-2_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 833\u2013851. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49"},{"key":"13_CR28","unstructured":"Qi, C., Su, H., Mo, K. & Guibas, L. PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation. Proceedings Of The IEEE Conference On Computer Vision And Pattern Recognition (CVPR). (2017,7)"},{"key":"13_CR29","unstructured":"Qi, C., Yi, L., Su, H. & Guibas, L. PointNet++: Deep hierarchical feature learning on point sets in a metric space. Advances In Neural Information Processing Systems. 2017-Decem pp. 5100-5109 (2017)"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Wu, B., Wan, A., Yue, X. & Keutzer, K. SqueezeSeg: Convolutional Neural Nets with Recurrent CRF for Real-Time Road-Object Segmentation from 3D LiDAR Point Cloud. 2018 IEEE International Conference On Robotics And Automation (ICRA). pp. 1887-1893 (2018)","DOI":"10.1109\/ICRA.2018.8462926"},{"key":"13_CR31","unstructured":"Wang, Y., Shi, T., Yun, P., Tai, L. & Liu, M. PointSeg: Real-Time Semantic Segmentation Based on 3D LiDAR Point Cloud. (2018)"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhou, Z., David, P., Yue, X., Xi, Z., Gong, B. & Foroosh, H. PolarNet: An Improved Grid Representation for Online LiDAR Point Clouds Semantic Segmentation. Proceedings Of The IEEE\/CVF Conference On Computer Vision And Pattern Recognition (CVPR). (2020,6)","DOI":"10.1109\/CVPR42600.2020.00962"},{"key":"13_CR33","doi-asserted-by":"crossref","unstructured":"Aksoy, E., Baci, S. & Cavdar, S. SalsaNet: Fast Road and Vehicle Segmentation in LiDAR Point Clouds for Autonomous Driving. 2020 IEEE Intelligent Vehicles Symposium (IV). pp. 926-932 (2020)","DOI":"10.1109\/IV47402.2020.9304694"},{"key":"13_CR34","doi-asserted-by":"crossref","unstructured":"Zhuang, Z., Li, R., Jia, K., Wang, Q., Li, Y. & Tan, M. Perception-Aware Multi-Sensor Fusion for 3D LiDAR Semantic Segmentation. Proceedings Of The IEEE\/CVF International Conference On Computer Vision (ICCV). pp. 16280-16290 (2021,10)","DOI":"10.1109\/ICCV48922.2021.01597"},{"key":"13_CR35","doi-asserted-by":"crossref","unstructured":"Zhu, X., Zhou, H., Wang, T., Hong, F., Ma, Y., Li, W., Li, H. & Lin, D. Cylindrical and Asymmetrical 3D Convolution Networks for LiDAR Segmentation. Proceedings Of The IEEE\/CVF Conference On Computer Vision And Pattern Recognition (CVPR). pp. 9939-9948 (2021,6)","DOI":"10.1109\/CVPR46437.2021.00981"},{"key":"13_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1007\/978-3-030-58520-4_4","volume-title":"Computer Vision \u2013 ECCV 2020","author":"W Chen","year":"2020","unstructured":"Chen, W., Zhu, X., Sun, R., He, J., Li, R., Shen, X., Yu, B.: Tensor Low-Rank Reconstruction for Semantic Segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12362, pp. 52\u201369. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58520-4_4"},{"key":"13_CR37","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P. & Urtasun, R. Are we ready for autonomous driving? The KITTI vision benchmark suite. 2012 IEEE Conference On Computer Vision And Pattern Recognition. pp. 3354-3361 (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"13_CR38","doi-asserted-by":"crossref","unstructured":"Behley, J., Garbade, M., Milioto, A., Quenzel, J., Behnke, S., Stachniss, C. & Gall, J. SemanticKITTI: A Dataset for Semantic Scene Understanding of LiDAR Sequences. Proceedings Of The IEEE\/CVF International Conference On Computer Vision (ICCV). (2019,10)","DOI":"10.1109\/ICCV.2019.00939"},{"key":"13_CR39","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S. & Schiele, B. The Cityscapes Dataset for Semantic Urban Scene Understanding. Proceedings Of The IEEE Conference On Computer Vision And Pattern Recognition (CVPR). (2016,6)","DOI":"10.1109\/CVPR.2016.350"},{"key":"13_CR40","unstructured":"Team, O. OpenPCDet: An Open-source Toolbox for 3D Object Detection from Point Clouds. (https:\/\/github.com\/open-mmlab\/OpenPCDet,2020)"},{"key":"13_CR41","doi-asserted-by":"crossref","unstructured":"Berman, M., Triki, A. & Blaschko, M. The Lov\u00e1sz-Softmax Loss: A Tractable Surrogate for the Optimization of the Intersection-Over-Union Measure in Neural Networks. Proceedings Of The IEEE Conference On Computer Vision And Pattern Recognition (CVPR). (2018,6)","DOI":"10.1109\/CVPR.2018.00464"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0969-7_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T13:04:46Z","timestamp":1733576686000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0969-7_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,8]]},"ISBN":["9789819609680","9789819609697"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0969-7_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,8]]},"assertion":[{"value":"8 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}