{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T09:59:51Z","timestamp":1777888791946,"version":"3.51.4"},"reference-count":66,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.02474","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"26653-26663","source":"Crossref","is-referenced-by-count":0,"title":["Temporal Overlapping Prediction: A Self-Supervised Pre-Training Method for LiDAR Moving Object Segmentation"],"prefix":"10.1109","author":[{"given":"Ziliang","family":"Miao","sequence":"first","affiliation":[{"name":"The University of Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Runjian","family":"Chen","sequence":"additional","affiliation":[{"name":"The University of Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yixi","family":"Cai","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Buwei","family":"He","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenquan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenqi","family":"Shao","sequence":"additional","affiliation":[{"name":"Shanghai AI Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai AI Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fu","family":"Zhang","sequence":"additional","affiliation":[{"name":"The University of Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"14487","article-title":"Uno: Unsupervised occupancy fields for perception and forecasting","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Ben","year":"2024"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01288"},{"key":"ref3","article-title":"SemanticKITTI: A Dataset for Semantic Scene Understanding of LiDAR Sequences","volume-title":"Proc. of the IEEE\/CVF International Conf. on Computer Vision (ICCV)","author":"J","year":"2019"},{"key":"ref4","article-title":"ALSO: Automotive lidar selfsupervision by occupancy estimation","volume-title":"International Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Alexandre","year":"2023"},{"key":"ref5","volume-title":"Language models are few-shot learners","author":"Tom B","year":"2020"},{"key":"ref6","volume-title":"nuscenes: A multimodal dataset for autonomous driving","author":"Holger","year":"2019"},{"key":"ref7","article-title":"Pointgpt: Auto-regressively generative pretraining from point clouds","volume":"36","author":"Guangyan","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","volume-title":"Co^3: Cooperative unsupervised 3d representation learning for autonomous driving","author":"Runjian","year":"2022"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3093567"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3166544"},{"key":"ref11","volume-title":"Mf-mos: A motion-focused model for moving object segmentation","author":"Jintao","year":"2024"},{"key":"ref12","first-page":"3075","article-title":"4d spatio-temporal convnets: Minkowski convolutional neural networks","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"Christopher","year":"2019"},{"key":"ref13","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Jacob","year":"2018"},{"key":"ref14","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Alexey","year":"2020"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122563"},{"key":"ref16","first-page":"5692","article-title":"Weakly supervised learning of rigid 3d scene flow","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Zan","year":"2021"},{"key":"ref17","first-page":"9729","article-title":"Momentum contrast for unsupervised visual representation learning","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Kaiming","year":"2020"},{"key":"ref18","first-page":"16000","article-title":"Masked autoencoders are scalable vision learners","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Kaiming","year":"2022"},{"key":"ref19","volume-title":"Deberta: Decoding-enhanced bert with disentangled attention","author":"Pengcheng","year":"2020"},{"key":"ref20","first-page":"350","article-title":"Masked autoencoder for self-supervised pre-training on lidar point clouds","volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","author":"Georg","year":"2023"},{"key":"ref21","first-page":"16089","article-title":"Ponder: Point cloud pre-training via neural rendering","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"Di","year":"2023"},{"key":"ref22","first-page":"6535","article-title":"Spatio-temporal self-supervised representation learning for 3d point clouds","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"Siyuan","year":"2021"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52729.2023.00114"},{"issue":"(3)","key":"ref24","first-page":"8044","article-title":"Rvmos: Range-view moving object segmentation leveraged by semantic and motion features","volume":"7","author":"Jaeyeul","year":"2022","journal-title":"IEEE Robotics and Automation Letters"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00169"},{"key":"ref26","first-page":"3383","article-title":"Maeli: Masked autoencoder for large-scale lidar point clouds","volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","author":"Georg","year":"2024"},{"key":"ref27","first-page":"3293","article-title":"Exploring geometry-aware contrast and clustering harmonization for self-supervised 3d object detection","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"Hanxue","year":"2021"},{"key":"ref28","volume-title":"Helimos: A dataset for moving object segmentation in 3d point clouds from heterogeneous lidar sensors","author":"Hyungtae","year":"2024"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01468"},{"key":"ref30","volume-title":"Roberta: A robustly optimized bert pretraining approach","volume":"364","author":"Yinhan","year":"2019"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341097"},{"key":"ref32","first-page":"1444","article-title":"Self-supervised point cloud prediction using 3d spatio-temporal convolutional networks","volume-title":"Conference on Robot Learning","author":"Benedikt","year":"2022"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3183245"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3292583"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"ref36","article-title":"Occupancy-mae: Self-supervised pre-training largescale lidar point clouds with masked occupancy autoencoders","author":"Chen","year":"2023","journal-title":"IEEE Transactions on Intelligent Vehicles"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3142440"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00505"},{"key":"ref39","first-page":"604","article-title":"Masked autoencoders for point cloud self-supervised learning","volume-title":"European conference on computer vision","author":"Yatian","year":"2022"},{"key":"ref40","volume-title":"Improving language understanding by generative pre-training","author":"Alec","year":"2018"},{"key":"ref41","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International Conference on Machine Learning","author":"Alec","year":"2021"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/3DV62453.2024.00017"},{"issue":"(3)","key":"ref43","first-page":"14051412","article-title":"An analytical lidar sensor model based on ray path information","volume":"2","author":"Alexander","year":"2017","journal-title":"IEEE Robotics and Automation Letters"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2024.3353835"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981210"},{"issue":"(2)","key":"ref46","first-page":"510","article-title":"Pointmoseg: Sparse tensor-based end-to-end moving-obstacle segmentation in 3-d lidar point clouds for autonomous driving","volume":"6","author":"Yuxiang","year":"2020","journal-title":"IEEE Robotics and Automation Letters"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01304"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/3DV50981.2020.00025"},{"key":"ref49","article-title":"Attention is all you need","author":"A","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342277"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.320"},{"key":"ref52","first-page":"178","article-title":"T-mae: Temporal masked autoencoders for point cloud representation learning","volume-title":"European Conference on Computer Vision","author":"Weijie","year":"2024"},{"key":"ref53","first-page":"11","article-title":"Inverting the pose forecasting pipeline with spf2: Sequential pointcloud forecasting for sequential pose forecasting","volume-title":"Conference on Robot Learning","author":"Xinshuo","year":"2021"},{"key":"ref54","first-page":"549","article-title":"S2net: Stochastic sequential pointcloud forecasting","volume-title":"European Conference on Computer Vision","author":"Xinshuo","year":"2022"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-46400-x"},{"key":"ref56","first-page":"574","article-title":"Pointcontrast: Unsupervised pretraining for 3d point cloud understanding","volume-title":"Computer Vision-ECCV 2020: 16th European Conference","author":"Saining","year":"2020"},{"key":"ref57","first-page":"1344513454","article-title":"Mv-jar: Masked voxel jigsaw and reconstruction for lidar-based self-supervised pre-training","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Runsen","year":"2023"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00907"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01443"},{"key":"ref60","first-page":"19313","article-title":"Point-bert: Pre-training 3d point cloud transformers with masked point modeling","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"Xumin","year":"2022"},{"key":"ref61","volume-title":"Learning unsupervised world models for autonomous driving via discrete diffusion","author":"Lunjun","year":"2023"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610278"},{"key":"ref63","first-page":"353","article-title":"SeFlow: A self-supervised scene flow method in autonomous driving","volume-title":"European Conference on Computer Vision (ECCV)","author":"Qingwen","year":"2024"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1962"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01460"},{"key":"ref66","volume-title":"Ponderv2: Pave the way for 3d foundataion model with a universal pre-training paradigm","author":"Haoyi","year":"2023"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11446049.pdf?arnumber=11446049","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T04:58:03Z","timestamp":1777611483000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11446049\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":66,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.02474","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}