{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T11:37:29Z","timestamp":1763811449684,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3652583.3658074","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T06:30:40Z","timestamp":1717741840000},"page":"266-274","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Image-to-Point Registration via Cross-Modality Correspondence Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4844-1353","authenticated-orcid":false,"given":"Lin","family":"Bie","sequence":"first","affiliation":[{"name":"School of Software, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9720-826X","authenticated-orcid":false,"given":"Siqi","family":"Li","sequence":"additional","affiliation":[{"name":"School of Software, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4565-8036","authenticated-orcid":false,"given":"Kai","family":"Cheng","sequence":"additional","affiliation":[{"name":"Command Control College, Army Engineering University, Nanjing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"doi-asserted-by":"publisher","key":"e_1_3_2_1_1_1","DOI":"10.1109\/CVPRW53098.2021.00324"},{"key":"e_1_3_2_1_2_1","first-page":"6648","volume-title":"2020 25th International Conference on Pattern Recognition","author":"Hadachi Amnir","year":"2021","unstructured":"ShanWu, Amnir Hadachi, Damien Vivet, and Yadu Prabhakar. NetCalib: A novel approach for lidar-camera auto-calibration based on deep learning. In 2020 25th International Conference on Pattern Recognition, pages 6648--6655, 2021."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1109\/ICRA48891.2023.10161575"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_4_1","DOI":"10.1007\/s11263-008-0152-6"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_5_1","DOI":"10.1109\/ICRA.2019.8794415"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_6_1","DOI":"10.1109\/ICCV48922.2021.01570"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_7_1","DOI":"10.1145\/358669.358692"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.1109\/CVPR46437.2021.01570"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_9_1","DOI":"10.1109\/TCSVT.2022.3208859"},{"key":"e_1_3_2_1_10_1","first-page":"652","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"Qi Charles R","year":"2017","unstructured":"Charles R Qi, Hao Su, Kaichun Mo, and Leonidas J Guibas. Pointnet: Deep learning on point sets for 3d classification and segmentation. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 652--660, 2017."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_11_1","DOI":"10.1109\/CVPR.2016.90"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.1109\/ICCV.2019.00651"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1109\/ICCV.2019.00362"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_14_1","DOI":"10.1109\/CVPR52688.2022.00280"},{"key":"e_1_3_2_1_15_1","first-page":"36","article-title":"Differentiable registration of images and lidar point clouds with voxelpointto-pixel matching","author":"Zhou Junsheng","year":"2024","unstructured":"Junsheng Zhou, Baorui Ma, Wenyuan Zhang, Yi Fang, Yu-Shen Liu, and Zhizhong Han. Differentiable registration of images and lidar point clouds with voxelpointto-pixel matching. In Advances in Neural Information Processing Systems, 2024, 36.","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.5555\/2354409.2354978"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1109\/CVPR42600.2020.01164"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_18_1","DOI":"10.1109\/CVPR42600.2020.00592"},{"key":"e_1_3_2_1_19_1","first-page":"857","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"Weng Xinshuo","year":"2019","unstructured":"Xinshuo Weng and Kris Kitani. Monocular 3d object detection with pseudo-lidar point cloud. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 857--866, 2019."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_20_1","DOI":"10.1109\/TIM.2023.3315416"},{"key":"e_1_3_2_1_21_1","article-title":"scene flow estimation on pseudo-lidar: Bridging the gap on estimating point motion","author":"Jiang Chaokang","year":"2022","unstructured":"Chaokang Jiang, Guangming Wang, Yanzi Miao, and Hesheng Wang. 3d scene flow estimation on pseudo-lidar: Bridging the gap on estimating point motion. IEEE Transactions on Industrial Informatics, 2022.","journal-title":"IEEE Transactions on Industrial Informatics"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.1109\/CVPR.2018.00931"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_23_1","DOI":"10.1109\/CVPR46437.2021.01369"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_24_1","DOI":"10.1109\/CVPR.2018.00214"},{"key":"e_1_3_2_1_25_1","volume-title":"Dong Wook Ko, and Il Hong Suh. From big to small: Multi-scale local planar guidance for monocular depth estimation. arXiv preprint arXiv:1907.10326","author":"Lee Jin Han","year":"2019","unstructured":"Jin Han Lee, Myung-Kyu Han, Dong Wook Ko, and Il Hong Suh. From big to small: Multi-scale local planar guidance for monocular depth estimation. arXiv preprint arXiv:1907.10326, 2019."},{"key":"e_1_3_2_1_26_1","volume-title":"New crfs: Neural window fully-connected crfs for monocular depth estimation. arXiv preprint arXiv:2203.01502","author":"Yuan Weihao","year":"2022","unstructured":"Weihao Yuan, Xiaodong Gu, Zuozhuo Dai, Siyu Zhu, and Ping Tan. New crfs: Neural window fully-connected crfs for monocular depth estimation. arXiv preprint arXiv:2203.01502, 2022."},{"key":"e_1_3_2_1_27_1","article-title":"Sparse pseudo-lidar depth assisted monocular depth estimation","author":"Shao Shuwei","year":"2023","unstructured":"Shuwei Shao, Zhongcai Pei, Weihai Chen, Qiang Liu, Haosong Yue, and Zhengguo Li. Sparse pseudo-lidar depth assisted monocular depth estimation. IEEE Transactions on Intelligent Vehicles, 2023.","journal-title":"IEEE Transactions on Intelligent Vehicles"},{"key":"e_1_3_2_1_28_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929, 2020."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_29_1","DOI":"10.1109\/34.121791"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_30_1","DOI":"10.1109\/TPAMI.1987.4767965"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_31_1","DOI":"10.1109\/ICCV.2019.00905"},{"key":"e_1_3_2_1_32_1","first-page":"3075","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"Choy Christopher","year":"2019","unstructured":"Christopher Choy, JunYoung Gwak, and Silvio Savarese. 4d spatio-temporal convnets: Minkowski convolutional neural networks. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 3075--3084, 2019."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_33_1","DOI":"10.1109\/CVPR42600.2020.00639"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_34_1","DOI":"10.1109\/CVPR52688.2022.01086"},{"key":"e_1_3_2_1_35_1","first-page":"34","article-title":"Reliable coarse-to-fine correspondences for robust point cloud registration","author":"Yu Hao","year":"2021","unstructured":"Hao Yu, Fu Li, Mahdi Saleh, Benjamin Busam, and Slobodan Ilic. CoFiNet: Reliable coarse-to-fine correspondences for robust point cloud registration. Advances in Neural Information Processing Systems, 34, 2021.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_36_1","volume-title":"Cfi2p: Coarse-to-fine crossmodal correspondence learning for image-to-point cloud registration. arXiv preprint arXiv:2307.07142","author":"Yao Gongxin","year":"2023","unstructured":"Gongxin Yao, Yixin Xuan, Yiwei Chen, and Yu Pan. Cfi2p: Coarse-to-fine crossmodal correspondence learning for image-to-point cloud registration. arXiv preprint arXiv:2307.07142, 2023."},{"key":"e_1_3_2_1_37_1","first-page":"30","article-title":"Attention is all you need","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. Attention is all you need. Advances in Neural Information Processing Systems, 30, 2017.","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_38_1","DOI":"10.1109\/CVPR46437.2021.01560"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_39_1","DOI":"10.1109\/CVPR46437.2021.00425"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_40_1","DOI":"10.1109\/CVPR42600.2020.00643"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_41_1","DOI":"10.1177\/0278364916679498"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_42_1","DOI":"10.1109\/ICCV.2019.00045"},{"key":"e_1_3_2_1_43_1","first-page":"607","volume-title":"Proceedings of the European Conference on Computer Vision","author":"Yew Zi Jian","year":"2018","unstructured":"Zi Jian Yew and Gim Hee Lee. 3DFeat-Net:Weakly supervised local 3d features for point cloud registration. In Proceedings of the European Conference on Computer Vision, pages 607--623, 2018."}],"event":{"sponsor":["SIGMM ACM Special Interest Group on Multimedia","SIGSOFT ACM Special Interest Group on Software Engineering"],"acronym":"ICMR '24","name":"ICMR '24: International Conference on Multimedia Retrieval","location":"Phuket Thailand"},"container-title":["Proceedings of the 2024 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658074","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652583.3658074","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T08:53:02Z","timestamp":1755766382000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658074"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":43,"alternative-id":["10.1145\/3652583.3658074","10.1145\/3652583"],"URL":"https:\/\/doi.org\/10.1145\/3652583.3658074","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}