{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:29:28Z","timestamp":1767324568388,"version":"3.48.0"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032128393","type":"print"},{"value":"9783032128409","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-12840-9_32","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:25:31Z","timestamp":1767324331000},"page":"502-517","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["CoProU-VO: Combining Projected Uncertainty for\u00a0End-to-End Unsupervised Monocular Visual Odometry"],"prefix":"10.1007","author":[{"given":"Jingchao","family":"Xie","sequence":"first","affiliation":[]},{"given":"Oussema","family":"Dhaouadi","sequence":"additional","affiliation":[]},{"given":"Weirong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Johannes","family":"Meier","sequence":"additional","affiliation":[]},{"given":"Jacques","family":"Kaiser","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Cremers","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"32_CR1","doi-asserted-by":"crossref","unstructured":"Bangunharcana, A., Magd, A., Kim, K.S.: Dualrefine: self-supervised depth and pose estimation through iterative epipolar sampling and refinement toward equilibrium. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 726\u2013738 (2023)","DOI":"10.1109\/CVPR52729.2023.00077"},{"key":"32_CR2","doi-asserted-by":"crossref","unstructured":"Bian, J.W., et al.: Unsupervised scale-consistent depth learning from video. Int. J. Comput. Vis. (IJCV) (2021)","DOI":"10.1007\/s11263-021-01484-6"},{"key":"32_CR3","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: Nuscenes: a multimodal dataset for autonomous driving. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"issue":"6","key":"32_CR4","doi-asserted-by":"publisher","first-page":"1874","DOI":"10.1109\/TRO.2021.3075644","volume":"37","author":"C Campos","year":"2021","unstructured":"Campos, C., Elvira, R., Rodr\u00edguez, J.J.G., Montiel, J.M., Tard\u00f3s, J.D.: Orb-slam3: an accurate open-source library for visual, visual-inertial, and multimap slam. IEEE Trans. Rob. 37(6), 1874\u20131890 (2021)","journal-title":"IEEE Trans. Rob."},{"key":"32_CR5","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"32_CR6","doi-asserted-by":"crossref","unstructured":"Chen, C., Wang, B., Lu, C.X., Trigoni, N., Markham, A.: Deep learning for visual localization and mapping: a survey. IEEE Trans. Neural Netw. Learn. Syst. (2023)","DOI":"10.1109\/TNNLS.2023.3309809"},{"issue":"3","key":"32_CR7","doi-asserted-by":"publisher","first-page":"3157","DOI":"10.1109\/TITS.2022.3227917","volume":"24","author":"J Dai","year":"2022","unstructured":"Dai, J., Gong, X., Li, Y., Wang, J., Wei, M.: Self-supervised deep visual odometry based on geometric attention model. IEEE Trans. Intell. Transp. Syst. 24(3), 3157\u20133166 (2022)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"32_CR8","unstructured":"Darcet, T., Oquab, M., Mairal, J., Bojanowski, P.: Vision transformers need registers. arXiv preprint arXiv:2309.16588 (2023)"},{"key":"32_CR9","doi-asserted-by":"publisher","unstructured":"Dikov, G., Van\u00a0Vugt, J.: Variational depth networks: uncertainty-aware monocular self-supervised depth estimation. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) Computer Vision \u2013 ECCV 2022 Workshops. ECCV 2022. LNCS, vol. 13808, pp. 43\u201360. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-25085-9_3","DOI":"10.1007\/978-3-031-25085-9_3"},{"key":"32_CR10","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16 $$\\times $$ 16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"issue":"3","key":"32_CR11","doi-asserted-by":"publisher","first-page":"611","DOI":"10.1109\/TPAMI.2017.2658577","volume":"40","author":"J Engel","year":"2017","unstructured":"Engel, J., Koltun, V., Cremers, D.: Direct sparse odometry. IEEE Trans. Pattern Anal. Mach. Intell. 40(3), 611\u2013625 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"32_CR12","doi-asserted-by":"publisher","unstructured":"Feng, Z., Yang, L., Jing, L., Wang, H., Tian, Y., Li, B.: Disentangling object motion and occlusion for unsupervised multi-frame monocular depth. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. ECCV 2022. LNCS, vol. 13692, pp. 228\u2013244. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19824-3_14","DOI":"10.1007\/978-3-031-19824-3_14"},{"issue":"11","key":"32_CR13","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger, A., Lenz, P., Stiller, C., Urtasun, R.: Vision meets robotics: the kitti dataset. Int. J. Robot. Res. 32(11), 1231\u20131237 (2013)","journal-title":"Int. J. Robot. Res."},{"key":"32_CR14","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The kitti vision benchmark suite. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"32_CR15","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., Firman, M., Brostow, G.J.: Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3828\u20133838 (2019)","DOI":"10.1109\/ICCV.2019.00393"},{"key":"32_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"32_CR17","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"32_CR18","doi-asserted-by":"crossref","unstructured":"Ilg, E., et al.: Uncertainty estimates and multi-hypotheses networks for optical flow. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 652\u2013667 (2018)","DOI":"10.1007\/978-3-030-01234-2_40"},{"key":"32_CR19","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et\u00a0al.: Spatial transformer networks. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"32_CR20","unstructured":"Kendall, A., Gal, Y.: What uncertainties do we need in bayesian deep learning for computer vision? Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"32_CR21","doi-asserted-by":"crossref","unstructured":"Klodt, M., Vedaldi, A.: Supervising the new with the old: learning sfm from sfm. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 698\u2013713 (2018)","DOI":"10.1007\/978-3-030-01249-6_43"},{"key":"32_CR22","doi-asserted-by":"publisher","unstructured":"Leroy, V., Cabon, Y., Revaud, J.: Grounding image matching in 3D with MASt3R. In: Leonardis, A., Ricci, E., Roth, S., Russakovsky, O., Sattler, T., Varol, G. (eds.) Computer Vision \u2013 ECCV 2024. ECCV 2024. LNCS, vol. 15130, pp. 71\u201391. Springer, Cham (2025). https:\/\/doi.org\/10.1007\/978-3-031-73220-1_5","DOI":"10.1007\/978-3-031-73220-1_5"},{"key":"32_CR23","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"32_CR24","doi-asserted-by":"crossref","unstructured":"Marsal, R., Chabot, F., Loesch, A., Grolleau, W., Sahbi, H.: Monoprob: self-supervised monocular depth estimation with interpretable uncertainty. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3637\u20133646 (2024)","DOI":"10.1109\/WACV57701.2024.00360"},{"issue":"5","key":"32_CR25","doi-asserted-by":"publisher","first-page":"1147","DOI":"10.1109\/TRO.2015.2463671","volume":"31","author":"R Mur-Artal","year":"2015","unstructured":"Mur-Artal, R., Montiel, J.M.M., Tardos, J.D.: Orb-slam: a versatile and accurate monocular slam system. IEEE Trans. Rob. 31(5), 1147\u20131163 (2015)","journal-title":"IEEE Trans. Rob."},{"issue":"5","key":"32_CR26","doi-asserted-by":"publisher","first-page":"1255","DOI":"10.1109\/TRO.2017.2705103","volume":"33","author":"R Mur-Artal","year":"2017","unstructured":"Mur-Artal, R., Tard\u00f3s, J.D.: Orb-slam2: an open-source slam system for monocular, stereo, and rgb-d cameras. IEEE Trans. Rob. 33(5), 1255\u20131262 (2017)","journal-title":"IEEE Trans. Rob."},{"key":"32_CR27","unstructured":"Oquab, M., et\u00a0al.: Dinov2: learning robust visual features without supervision. Trans. Mach. Learn. Res. J. 1\u201331 (2024)"},{"key":"32_CR28","doi-asserted-by":"crossref","unstructured":"Poggi, M., Aleotti, F., Tosi, F., Mattoccia, S.: On the uncertainty of self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3227\u20133237 (2020)","DOI":"10.1109\/CVPR42600.2020.00329"},{"key":"32_CR29","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PmLR (2021)"},{"key":"32_CR30","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12179\u201312188 (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"32_CR31","doi-asserted-by":"crossref","unstructured":"Wang, C., Zhang, G., Zhou, W.: Self-supervised learning of monocular visual odometry and depth with uncertainty-aware scale consistency. In: 2024 IEEE International Conference on Robotics and Automation (ICRA), pp. 3984\u20133990. IEEE (2024)","DOI":"10.1109\/ICRA57147.2024.10610075"},{"key":"32_CR32","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, M., Karaev, N., Vedaldi, A., Rupprecht, C., Novotny, D.: Vggt: visual geometry grounded transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2025)","DOI":"10.1109\/CVPR52734.2025.00499"},{"key":"32_CR33","doi-asserted-by":"crossref","unstructured":"Wang, S., Leroy, V., Cabon, Y., Chidlovskii, B., Revaud, J.: Dust3r: geometric 3d vision made easy. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20697\u201320709 (2024)","DOI":"10.1109\/CVPR52733.2024.01956"},{"issue":"4","key":"32_CR34","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"32_CR35","doi-asserted-by":"crossref","unstructured":"Wimbauer, F., Chen, W., Muhle, D., Rupprecht, C., Cremers, D.: Anycam: learning to recover camera poses and intrinsics from casual videos. In: Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR), pp. 16717\u201316727 (2025)","DOI":"10.1109\/CVPR52734.2025.01558"},{"key":"32_CR36","doi-asserted-by":"crossref","unstructured":"Yang, L., Kang, B., Huang, Z., Xu, X., Feng, J., Zhao, H.: Depth anything: unleashing the power of large-scale unlabeled data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10371\u201310381 (2024)","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"32_CR37","first-page":"21875","volume":"37","author":"L Yang","year":"2024","unstructured":"Yang, L., et al.: Depth anything v2. Adv. Neural Inf. Process. Syst. 37, 21875\u201321911 (2024)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"32_CR38","doi-asserted-by":"crossref","unstructured":"Yang, N., Stumberg, L.V., Wang, R., Cremers, D.: D3VO: deep depth, deep pose and deep uncertainty for monocular visual odometry. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1281\u20131292 (2020)","DOI":"10.1109\/CVPR42600.2020.00136"},{"key":"32_CR39","doi-asserted-by":"crossref","unstructured":"Yang, N., Wang, R., Stuckler, J., Cremers, D.: Deep virtual stereo odometry: leveraging deep depth prediction for monocular direct sparse odometry. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 817\u2013833 (2018)","DOI":"10.1007\/978-3-030-01237-3_50"},{"key":"32_CR40","doi-asserted-by":"crossref","unstructured":"Yin, Z., Shi, J.: Geonet: unsupervised learning of dense depth, optical flow and camera pose. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1983\u20131992 (2018)","DOI":"10.1109\/CVPR.2018.00212"},{"issue":"4","key":"32_CR41","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1007\/s40903-015-0032-7","volume":"1","author":"K Yousif","year":"2015","unstructured":"Yousif, K., Bab-Hadiashar, A., Hoseinnezhad, R.: An overview to visual odometry and visual slam: applications to mobile robotics. Intell. Ind. Syst. 1(4), 289\u2013311 (2015)","journal-title":"Intell. Ind. Syst."},{"key":"32_CR42","doi-asserted-by":"crossref","unstructured":"Zhan, H., Garg, R., Weerasekera, C.S., Li, K., Agarwal, H., Reid, I.: Unsupervised learning of monocular depth estimation and visual odometry with deep feature reconstruction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 340\u2013349 (2018)","DOI":"10.1109\/CVPR.2018.00043"},{"key":"32_CR43","doi-asserted-by":"crossref","unstructured":"Zhan, H., Weerasekera, C.S., Bian, J.W., Reid, I.: Visual odometry revisited: what should be learnt? In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 4203\u20134210. IEEE (2020)","DOI":"10.1109\/ICRA40945.2020.9197374"},{"key":"32_CR44","unstructured":"Zhang, J., et al.: Monst3r: a simple approach for estimating geometry in the presence of motion. arXiv preprint arxiv:2410.03825 (2024)"},{"key":"32_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, S., Zhang, J., Tao, D.: Towards scale consistent monocular visual odometry by learning from the virtual world. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 5601\u20135607. IEEE (2022)","DOI":"10.1109\/ICRA46639.2022.9812347"},{"key":"32_CR46","doi-asserted-by":"publisher","unstructured":"Zhang, Z., Cole, F., Li, Z., Rubinstein, M., Snavely, N., Freeman, W.T.: Structure and motion from casual videos. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. ECCV 2022. LNCS, vol. 13693, pp. 20\u201337. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19827-4_2","DOI":"10.1007\/978-3-031-19827-4_2"},{"key":"32_CR47","doi-asserted-by":"crossref","unstructured":"Zhou, K., et al.: Manydepth2: motion-aware self-supervised monocular depth estimation in dynamic scenes. IEEE Robot. Autom. Lett. (2025)","DOI":"10.1109\/LRA.2025.3568337"},{"key":"32_CR48","doi-asserted-by":"crossref","unstructured":"Zhou, T., Brown, M., Snavely, N., Lowe, D.G.: Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1851\u20131858 (2017)","DOI":"10.1109\/CVPR.2017.700"},{"key":"32_CR49","doi-asserted-by":"publisher","unstructured":"Zou, Y., Ji, P., Tran, Q.H., Huang, J.B., Chandraker, M.: Learning monocular visual odometry via self-supervised long-term modeling. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.M. (eds.) Computer Vision \u2013 ECCV 2020. ECCV 2020. LNCS, vol. 12359, pp. 710\u2013727. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58568-6_42","DOI":"10.1007\/978-3-030-58568-6_42"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-12840-9_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:25:37Z","timestamp":1767324337000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-12840-9_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032128393","9783032128409"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-12840-9_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAGM GCPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"DAGM German Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Freiburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"47","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dagm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.dagm-gcpr.de\/year\/2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}