{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:24:45Z","timestamp":1775665485658,"version":"3.50.1"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730320","type":"print"},{"value":"9783031730337","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73033-7_26","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:03:55Z","timestamp":1730333035000},"page":"459-475","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Image-to-Lidar Relational Distillation for\u00a0Autonomous Driving Data"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0836-7416","authenticated-orcid":false,"given":"Anas","family":"Mahmoud","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2593-2141","authenticated-orcid":false,"given":"Ali","family":"Harakeh","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4217-4415","authenticated-orcid":false,"given":"Steven","family":"Waslander","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"issue":"11","key":"26_CR1","doi-asserted-by":"publisher","first-page":"2274","DOI":"10.1109\/TPAMI.2012.120","volume":"34","author":"R Achanta","year":"2012","unstructured":"Achanta, R., Shaji, A., Smith, K., Lucchi, A., Fua, P., S\u00fcsstrunk, S.: Slic superpixels compared to state-of-the-art superpixel methods. IEEE TPAMI 34(11), 2274\u20132282 (2012)","journal-title":"IEEE TPAMI"},{"key":"26_CR2","unstructured":"Bardes, A., Ponce, J., Lecun, Y.: Vicreg: variance-invariance-covariance regularization for self-supervised learning. In: ICLR (2022)"},{"key":"26_CR3","doi-asserted-by":"crossref","unstructured":"Behley, J., et al.: SemanticKITTI: a dataset for semantic scene understanding of LiDAR sequences. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00939"},{"key":"26_CR4","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuscenes: a multimodal dataset for autonomous driving. In: CVPR, pp. 11621\u201311631 (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"26_CR6","first-page":"9912","volume":"33","author":"M Caron","year":"2020","unstructured":"Caron, M., Misra, I., Mairal, J., Goyal, P., Bojanowski, P., Joulin, A.: Unsupervised learning of visual features by contrasting cluster assignments. NeurIPS 33, 9912\u20139924 (2020)","journal-title":"NeurIPS"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: ICCV, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"26_CR8","unstructured":"Chen, R., et al.: Towards label-free scene understanding by vision foundation models. In: Thirty-Seventh Conference on Neural Information Processing Systems (2023)"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Chen, R., et al.: Clip2scene: towards label-efficient 3D scene understanding by clip. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7020\u20137030 (2023)","DOI":"10.1109\/CVPR52729.2023.00678"},{"key":"26_CR10","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: ICML, pp. 1597\u20131607 (2020)"},{"key":"26_CR11","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"540","DOI":"10.1007\/978-3-031-20059-5_31","volume-title":"European Conference on Computer Vision","author":"G Ghiasi","year":"2022","unstructured":"Ghiasi, G., Gu, X., Cui, Y., Lin, T.Y.: Scaling open-vocabulary image segmentation with image-level labels. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13696, pp. 540\u2013557. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20059-5_31"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Girdhar, R., et al.: Imagebind: one embedding space to bind them all. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15180\u201315190 (2023)","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"26_CR13","doi-asserted-by":"publisher","first-page":"1789","DOI":"10.1007\/s11263-021-01453-z","volume":"129","author":"J Gou","year":"2021","unstructured":"Gou, J., Yu, B., Maybank, S.J., Tao, D.: Knowledge distillation: a survey. Int. J. Comput. Vision 129, 1789\u20131819 (2021)","journal-title":"Int. J. Comput. Vision"},{"key":"26_CR14","unstructured":"Grill, J.B., et al.: Bootstrap your own latent: a new approach to self-supervised learning. arXiv preprint arXiv:2006.07733 (2020)"},{"key":"26_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"26_CR16","doi-asserted-by":"crossref","unstructured":"H\u00e9naff, O.J., Koppula, S., Alayrac, J.B., Van\u00a0den Oord, A., Vinyals, O., Carreira, J.: Efficient visual pretraining with contrastive detection. In: ICCV, pp. 10086\u201310096 (2021)","DOI":"10.1109\/ICCV48922.2021.00993"},{"key":"26_CR17","doi-asserted-by":"crossref","unstructured":"Hess, G., Tonderski, A., Petersson, C., \u00c5str\u00f6m, K., Svensson, L.: Lidarclip or: how i learned to talk to point clouds. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 7438\u20137447 (2024)","DOI":"10.1109\/WACV57701.2024.00727"},{"key":"26_CR18","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"26_CR19","doi-asserted-by":"crossref","unstructured":"Jatavallabhula, K., et al.: Conceptfusion: open-set multimodal 3D mapping. Robot. Sci. Syst. (2023)","DOI":"10.15607\/RSS.2023.XIX.066"},{"key":"26_CR20","doi-asserted-by":"crossref","unstructured":"Kerr, J., Kim, C.M., Goldberg, K., Kanazawa, A., Tancik, M.: LERF: language embedded radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 19729\u201319739 (2023)","DOI":"10.1109\/ICCV51070.2023.01807"},{"key":"26_CR21","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 4015\u20134026 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"26_CR22","unstructured":"Li, B., Weinberger, K.Q., Belongie, S., Koltun, V., Ranftl, R.: Language-driven semantic segmentation. In: ICLR (2022)"},{"key":"26_CR23","first-page":"17612","volume":"35","author":"VW Liang","year":"2022","unstructured":"Liang, V.W., Zhang, Y., Kwon, Y., Yeung, S., Zou, J.Y.: Mind the gap: Understanding the modality gap in multi-modal contrastive representation learning. Adv. Neural. Inf. Process. Syst. 35, 17612\u201317625 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR24","unstructured":"Liu, Y., et al.: Segment any point cloud sequences by distilling vision foundation models. In: Advances in Neural Information Processing Systems (2023)"},{"key":"26_CR25","unstructured":"Liu, Y.C., et al.: Learning from 2D: contrastive pixel-to-point knowledge transfer for 3D pretraining. arXiv preprint arXiv:2104.04687 (2021)"},{"key":"26_CR26","doi-asserted-by":"crossref","unstructured":"Mahmoud, A., et al.: Sieve: multimodal dataset pruning using image captioning models. In: CVPR, pp. 22423\u201322432 (2024)","DOI":"10.1109\/CVPR52733.2024.02116"},{"key":"26_CR27","doi-asserted-by":"crossref","unstructured":"Mahmoud, A., Hu, J.S.K., Kuai, T., Harakeh, A., Paull, L., Waslander, S.L.: Self-supervised image-to-point distillation via semantically tolerant contrastive loss. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7102\u20137110 (2023)","DOI":"10.1109\/CVPR52729.2023.00686"},{"key":"26_CR28","doi-asserted-by":"crossref","unstructured":"Mahmoud, A., Hu, J.S., Waslander, S.L.: Dense voxel fusion for 3D object detection. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 663\u2013672 (2023)","DOI":"10.1109\/WACV56688.2023.00073"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Mahmoud, A., Waslander, S.L.: Sequential fusion via bounding box and motion pointpainting for 3D objection detection. In: 18th Conference on Robots and Vision (CRV) (2021)","DOI":"10.1109\/CRV52889.2021.00013"},{"key":"26_CR30","unstructured":"Oquab, M., et al.: Dinov2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"26_CR31","doi-asserted-by":"crossref","unstructured":"Park, W., Kim, D., Lu, Y., Cho, M.: Relational knowledge distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3967\u20133976 (2019)","DOI":"10.1109\/CVPR.2019.00409"},{"key":"26_CR32","unstructured":"Parnami, A., Lee, M.: Learning from few examples: a summary of approaches to few-shot learning. arXiv preprint arXiv:2203.04291 (2022)"},{"key":"26_CR33","doi-asserted-by":"crossref","unstructured":"Peng, S., et al.: Openscene: 3D scene understanding with open vocabularies. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 815\u2013824 (2023)","DOI":"10.1109\/CVPR52729.2023.00085"},{"key":"26_CR34","unstructured":"Puy, G., et al.: Revisiting the distillation of image representations into point clouds for autonomous driving. arXiv preprint arXiv:2310.17504 (2023)"},{"key":"26_CR35","doi-asserted-by":"crossref","unstructured":"Qian, J., Chatrath, V., Yang, J., Servos, J., Schoellig, A.P., Waslander, S.L.: POCD: probabilistic object-level change detection and volumetric mapping in semi-static scenes. Sci. Syst. Robot. (2022)","DOI":"10.15607\/RSS.2022.XVIII.013"},{"key":"26_CR36","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"26_CR37","unstructured":"Romero, A., Ballas, N., Kahou, S.E., Chassang, A., Gatta, C., Bengio, Y.: Fitnets: hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)"},{"key":"26_CR38","doi-asserted-by":"crossref","unstructured":"Sautier, C., Puy, G., Gidaris, S., Boulch, A., Bursuc, A., Marlet, R.: Image-to-lidar self-supervised distillation for autonomous driving data. In: CVPR, pp. 9891\u20139901 (2022)","DOI":"10.1109\/CVPR52688.2022.00966"},{"key":"26_CR39","unstructured":"Shi, P., Welle, M.C., Bj\u00f6rkman, M., Kragic, D.: Towards understanding the modality gap in CLIP. In: ICLR 2023 Workshop on Multimodal Representation Learning: Perks and Pitfalls (2023)"},{"key":"26_CR40","unstructured":"Tian, Y., Krishnan, D., Isola, P.: Contrastive representation distillation. arXiv preprint arXiv:1910.10699 (2019)"},{"key":"26_CR41","doi-asserted-by":"crossref","unstructured":"Wang, F., Liu, H.: Understanding the behaviour of contrastive loss. In: CVPR, pp. 2495\u20132504 (2021)","DOI":"10.1109\/CVPR46437.2021.00252"},{"key":"26_CR42","unstructured":"Wang, T., Isola, P.: Understanding contrastive representation learning through alignment and uniformity on the hypersphere. In: International Conference on Machine Learning, pp. 9929\u20139939. PMLR (2020)"},{"key":"26_CR43","unstructured":"Zagoruyko, S., Komodakis, N.: Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer. arXiv preprint arXiv:1612.03928 (2016)"},{"key":"26_CR44","doi-asserted-by":"crossref","unstructured":"Zhang, R., et al.: Pointclip: point cloud understanding by clip. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8552\u20138562 (2022)","DOI":"10.1109\/CVPR52688.2022.00836"},{"key":"26_CR45","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"696","DOI":"10.1007\/978-3-031-19815-1_40","volume-title":"European Conference on Computer Vision","author":"C Zhou","year":"2022","unstructured":"Zhou, C., Loy, C.C., Dai, B.: Extract free dense labels from clip. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13688, pp. 696\u2013712. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19815-1_40"},{"key":"26_CR46","doi-asserted-by":"crossref","unstructured":"Zhu, X., Zhang, R., He, B., Zeng, Z., Zhang, S., Gao, P.: Pointclip V2: adapting clip for powerful 3D open-world learning. arXiv preprint arXiv:2211.11682 (2022)","DOI":"10.1109\/ICCV51070.2023.00249"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73033-7_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:40:59Z","timestamp":1730335259000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73033-7_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031730320","9783031730337"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73033-7_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}