{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T15:33:05Z","timestamp":1775143985347,"version":"3.50.1"},"publisher-location":"Cham","reference-count":57,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585280","type":"print"},{"value":"9783030585297","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58529-7_34","type":"book-chapter","created":{"date-parts":[[2020,11,12]],"date-time":"2020-11-12T09:06:09Z","timestamp":1605171969000},"page":"572-588","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":175,"title":["Feature-Metric Loss for Self-supervised Learning of Depth and Egomotion"],"prefix":"10.1007","author":[{"given":"Chang","family":"Shu","sequence":"first","affiliation":[]},{"given":"Kun","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Zhixiang","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Kuiyuan","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,13]]},"reference":[{"key":"34_CR1","doi-asserted-by":"crossref","unstructured":"Andraghetti, L., et al.: Enhancing self-supervised monocular depth estimation with traditional visual odometry. arXiv:1908.03127 (2019)","DOI":"10.1109\/3DV.2019.00054"},{"key":"34_CR2","unstructured":"Bian, J.W., et al.: Unsupervised scale-consistent depth and ego-motion learning from monocular video. In: NeurIPS (2019)"},{"key":"34_CR3","unstructured":"Bian, J.W., Zhan, H., Wang, N., Chin, T.J., Shen, C., Reid, I.: Unsupervised depth learning in challenging indoor video: Weak rectification to rescue. arXiv:2006.02708 (2020)"},{"key":"34_CR4","doi-asserted-by":"crossref","unstructured":"Casser, V., Pirk, S., Mahjourian, R., Angelova, A.: Depth prediction without the sensors: leveraging structure for unsupervised learning from monocular videos. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33018001"},{"key":"34_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y., Schmid, C., Sminchisescu, C.: Self-supervised learning with geometric constraints in monocular video: connecting flow, depth, and camera. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00716"},{"key":"34_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, X., Zhong, Y., Dai, Y., Ji, P., Li, H.: Noise-aware unsupervised deep lidar-stereo fusion. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00650"},{"key":"34_CR7","doi-asserted-by":"crossref","unstructured":"Deshpande, A., Rock, J., Forsyth, D.: Learning large-scale automatic image colorization. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.72"},{"key":"34_CR8","doi-asserted-by":"crossref","unstructured":"DeSouza, G.N., Kak, A.C.: Vision for mobile robot navigation: a survey. TPAMI (2002)","DOI":"10.1109\/34.982903"},{"key":"34_CR9","doi-asserted-by":"crossref","unstructured":"Doersch, C., Gupta, A., Efros, A.A.: Unsupervised visual representation learning by context prediction. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.167"},{"key":"34_CR10","unstructured":"Donahue, J., Kr\u00e4henb\u00fchl, P., Darrell, T.: Adversarial feature learning. arXiv preprint arXiv:1605.09782 (2016)"},{"key":"34_CR11","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. In: NeurIPS (2014)"},{"key":"34_CR12","doi-asserted-by":"crossref","unstructured":"Engel, J., Koltun, V., Cremers, D.: Direct sparse odometry. TPAMI (2017)","DOI":"10.1109\/TPAMI.2017.2658577"},{"key":"34_CR13","doi-asserted-by":"crossref","unstructured":"Fu, H., Gong, M., Wang, C., Batmanghelich, K., Tao, D.: Deep ordinal regression network for monocular depth estimation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00214"},{"key":"34_CR14","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The KITTI vision benchmark suite. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"34_CR15","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac Aodha, O., Brostow, G.: Digging into self-supervised monocular depth estimation. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00393"},{"key":"34_CR16","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac Aodha, O., Brostow, G.J.: Unsupervised monocular depth estimation with left-right consistency. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.699"},{"key":"34_CR17","doi-asserted-by":"crossref","unstructured":"Gordon, A., Li, H., Jonschkowski, R., Angelova, A.: Depth from videos in the wild: unsupervised monocular depth learning from unknown cameras. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00907"},{"key":"34_CR18","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"34_CR19","doi-asserted-by":"crossref","unstructured":"Hirschmuller, H.: Stereo processing by semiglobal matching and mutual information. TPAMI (2007)","DOI":"10.1109\/TPAMI.2007.1166"},{"key":"34_CR20","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv:1412.6980 (2014)"},{"key":"34_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"577","DOI":"10.1007\/978-3-319-46493-0_35","volume-title":"Computer Vision \u2013 ECCV 2016","author":"G Larsson","year":"2016","unstructured":"Larsson, G., Maire, M., Shakhnarovich, G.: Learning representations for automatic colorization. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 577\u2013593. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_35"},{"key":"34_CR22","doi-asserted-by":"crossref","unstructured":"Larsson, G., Maire, M., Shakhnarovich, G.: Colorization as a proxy task for visual understanding. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.96"},{"key":"34_CR23","unstructured":"Lee, J.H., Han, M.K., Ko, D.W., Suh, I.H.: From big to small: multi-scale local planar guidance for monocular depth estimation. arXiv:1907.10326 (2019)"},{"key":"34_CR24","doi-asserted-by":"crossref","unstructured":"Li, R., Wang, S., Long, Z., Gu, D.: Undeepvo: Monocular visual odometry through unsupervised deep learning. In: ICRA (2018)","DOI":"10.1109\/ICRA.2018.8461251"},{"key":"34_CR25","doi-asserted-by":"crossref","unstructured":"Li, Y., Ushiku, Y., Harada, T.: Pose graph optimization for unsupervised monocular visual odometry. arXiv:1903.06315 (2019)","DOI":"10.1109\/ICRA.2019.8793706"},{"key":"34_CR26","unstructured":"Luo, C., et al.: Every pixel counts++: joint learning of geometry and motion with 3d holistic understanding. arXiv:1810.06125 (2018)"},{"key":"34_CR27","doi-asserted-by":"crossref","unstructured":"Mahjourian, R., Wicke, M., Angelova, A.: Unsupervised learning of depth and ego-motion from monocular video using 3D geometric constraints. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00594"},{"key":"34_CR28","doi-asserted-by":"crossref","unstructured":"Masci, J., Meier, U., Cire\u015fan, D., Schmidhuber, J.: Stacked convolutional auto-encoders for hierarchical feature extraction. In: ICANN (2011)","DOI":"10.1007\/978-3-642-21735-7_7"},{"key":"34_CR29","doi-asserted-by":"crossref","unstructured":"Mayer, N., et al.: A large dataset to train convolutional networks for disparity, optical flow, and scene flow estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4040\u20134048 (2016)","DOI":"10.1109\/CVPR.2016.438"},{"key":"34_CR30","doi-asserted-by":"crossref","unstructured":"Meng, Y., et al.: Signet: semantic instance aided unsupervised 3D geometry perception. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01004"},{"key":"34_CR31","doi-asserted-by":"crossref","unstructured":"Menze, M., Geiger, A.: Object scene flow for autonomous vehicles. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298925"},{"key":"34_CR32","unstructured":"Mescheder, L., Nowozin, S., Geiger, A.: Adversarial variational Bayes: unifying variational autoencoders and generative adversarial networks. In: ICML (2017)"},{"key":"34_CR33","unstructured":"Mur-Artal, R., Montiel, J.M.M., Tardos, J.D.: ORB-SLAM: a versatile and accurate monocular slam system. TR (2017)"},{"issue":"5","key":"34_CR34","doi-asserted-by":"publisher","first-page":"1147","DOI":"10.1109\/TRO.2015.2463671","volume":"31","author":"R Mur-Artal","year":"2015","unstructured":"Mur-Artal, R., Montiel, J.M.M., Tardos, J.D.: ORB-SLAM: a versatile and accurate monocular slam system. IEEE Trans. Rob. 31(5), 1147\u20131163 (2015)","journal-title":"IEEE Trans. Rob."},{"key":"34_CR35","doi-asserted-by":"crossref","unstructured":"Newcombe, R.A., Lovegrove, S.J., Davison, A.J.: DTAM: dense tracking and mapping in real-time. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126513"},{"key":"34_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/978-3-319-46466-4_5","volume-title":"Computer Vision \u2013 ECCV 2016","author":"M Noroozi","year":"2016","unstructured":"Noroozi, M., Favaro, P.: Unsupervised learning of visual representations by solving jigsaw puzzles. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9910, pp. 69\u201384. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46466-4_5"},{"key":"34_CR37","doi-asserted-by":"crossref","unstructured":"Noroozi, M., Pirsiavash, H., Favaro, P.: Representation learning by learning to count. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.628"},{"key":"34_CR38","doi-asserted-by":"crossref","unstructured":"Noroozi, M., Vinjimoor, A., Favaro, P., Pirsiavash, H.: Boosting self-supervised learning via knowledge transfer. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00975"},{"key":"34_CR39","unstructured":"Paszke, A., et al.: Automatic differentiation in Pytorch. In: NeurIPS-W (2017)"},{"key":"34_CR40","doi-asserted-by":"crossref","unstructured":"Pillai, S., Ambrus, R., Gaidon, A.: SuperDepth: self-supervised, super-resolved monocular depth estimation. In: ICRA (2019)","DOI":"10.1109\/ICRA.2019.8793621"},{"key":"34_CR41","doi-asserted-by":"crossref","unstructured":"Pilzer, A., Lathuili\u00e8re, S., Sebe, N., Ricci, E.: Refine and distill: exploiting cycle-inconsistency and knowledge distillation for unsupervised monocular depth estimation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01000"},{"key":"34_CR42","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1016\/j.robot.2017.03.019","volume":"93","author":"T Pire","year":"2017","unstructured":"Pire, T., Fischer, T., Castro, G., De Crist\u00f3foris, P., Civera, J., Berlles, J.J.: S-PTAM: stereo parallel tracking and mapping. Rob. Auton. Syst. 93, 27\u201342 (2017)","journal-title":"Rob. Auton. Syst."},{"key":"34_CR43","doi-asserted-by":"crossref","unstructured":"Ranjan, A., Jampani, V., Kim, K., Sun, D., Wulff, J., Black, M.J.: Competitive collaboration: joint unsupervised learning of depth, camera motion, optical flow and motion segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01252"},{"key":"34_CR44","doi-asserted-by":"crossref","unstructured":"Tosi, F., Aleotti, F., Poggi, M., Mattoccia, S.: Learning monocular depth estimation infusing traditional stereo knowledge. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01003"},{"key":"34_CR45","doi-asserted-by":"crossref","unstructured":"Vincent, P., Larochelle, H., Bengio, Y., Manzagol, P.A.: Extracting and composing robust features with denoising autoencoders. In: ICML (2008)","DOI":"10.1145\/1390156.1390294"},{"key":"34_CR46","doi-asserted-by":"crossref","unstructured":"Wang, C., Buenaposada, J.M., Zhu, R., Lucey, S.: Learning depth from monocular videos using direct methods. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00216"},{"key":"34_CR47","doi-asserted-by":"crossref","unstructured":"Watson, J., Firman, M., Brostow, G.J., Turmukhambetov, D.: Self-supervised monocular depth hints. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00225"},{"key":"34_CR48","doi-asserted-by":"crossref","unstructured":"Wong, A., Hong, B.W., Soatto, S.: Bilateral cyclic constraint and adaptive regularization for unsupervised monocular depth prediction. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00579"},{"key":"34_CR49","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"691","DOI":"10.1007\/978-3-030-11021-5_43","volume-title":"Computer Vision \u2013 ECCV 2018 Workshops","author":"Z Yang","year":"2019","unstructured":"Yang, Z., Wang, P., Wang, Y., Xu, W., Nevatia, R.: Every pixel counts: unsupervised geometry learning with holistic 3D motion understanding. In: Leal-Taix\u00e9, L., Roth, S. (eds.) ECCV 2018. LNCS, vol. 11133, pp. 691\u2013709. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-11021-5_43"},{"key":"34_CR50","doi-asserted-by":"crossref","unstructured":"Yang, Z., Wang, P., Wang, Y., Xu, W., Nevatia, R.: Lego: Learning edge with geometry all at once by watching videos. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00031"},{"key":"34_CR51","doi-asserted-by":"crossref","unstructured":"Yang, Z., Wang, P., Xu, W., Zhao, L., Nevatia, R.: Unsupervised learning of geometry with edge-aware depth-normal consistency. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.12257"},{"key":"34_CR52","doi-asserted-by":"crossref","unstructured":"Yin, Z., Shi, J.: GeoNet: unsupervised learning of dense depth, optical flow and camera pose. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00212"},{"key":"34_CR53","doi-asserted-by":"crossref","unstructured":"Zhan, H., Garg, R., Weerasekera, C.S., Li, K., Agarwal, H., Reid, I.: Unsupervised learning of monocular depth estimation and visual odometry with deep feature reconstruction. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00043"},{"key":"34_CR54","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"649","DOI":"10.1007\/978-3-319-46487-9_40","volume-title":"Computer Vision \u2013 ECCV 2016","author":"R Zhang","year":"2016","unstructured":"Zhang, R., Isola, P., Efros, A.A.: Colorful image colorization. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 649\u2013666. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_40"},{"key":"34_CR55","doi-asserted-by":"crossref","unstructured":"Zhou, J., Wang, Y., Qin, K., Zeng, W.: Unsupervised high-resolution depth learning from videos with dual networks. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00697"},{"key":"34_CR56","doi-asserted-by":"crossref","unstructured":"Zhou, T., Brown, M., Snavely, N., Lowe, D.G.: Unsupervised learning of depth and ego-motion from video. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.700"},{"key":"34_CR57","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1007\/978-3-030-01228-1_3","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Y Zou","year":"2018","unstructured":"Zou, Y., Luo, Z., Huang, J.-B.: DF-net: unsupervised joint learning of depth and flow using cross-task consistency. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11209, pp. 38\u201355. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01228-1_3"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58529-7_34","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:36:23Z","timestamp":1731371783000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58529-7_34"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585280","9783030585297"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58529-7_34","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"13 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}